In [12]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
import glob
import datetime
import xml.etree.cElementTree as et
from datetime import datetime
from pandas.core.tools.datetimes import to_datetime

from datetime import timedelta
from RNN import RNN
from Transformer import Transformer

#from utils import series_to_supervised


In [13]:
def read_xml_data(filename, selected_items):
  tree=et.parse(filename)
  root=tree.getroot()
  #extract selected items
  for child in root:
    if child.tag in selected_items:
      df = pd.DataFrame()
      for elem in child:      
        df1 = pd.DataFrame(elem.attrib, index=[0])
        #df = df.append(df1)
        df = pd.concat([df, df1])
      #First column is the timestamp (dayfirst)      
      #df.iloc[:,0] = pd.to_datetime(df.iloc[:,0], dayfirst=True)
      #write to csv file using the timestamp as index      
      df.to_csv(child.tag+'.csv', index=0)

In [14]:
def read_ts_file(filename):
  #reads csv file where the first column is a timestamp and the index column
  df = pd.read_csv (filename, parse_dates=[0], dayfirst=True, index_col=0)
  return df

In [15]:
#selected_items = ['glucose_level','bolus','meal']
read_xml_data(filename='c://aadm/584-ws-training.xml', selected_items=['glucose_level','bolus','meal'])

In [16]:
glucose_train = read_ts_file('glucose_level.csv')
glucose_train.rename(columns={"ts": "timestamp", "value": "glucose"}, inplace=True)
glucose_train.describe()

Unnamed: 0,glucose
count,12150.0
mean,192.484444
std,65.442789
min,40.0
25%,145.0
50%,183.0
75%,230.0
max,400.0


In [17]:
def timedf(df):
  #creates a 5 minute interval timeseries dataframe based in index of df
  # df must have a timestamp index
  #time_df: result dataframe with timestamp index
  timestamp = pd.date_range(start=df.index[0], end=df.index[-1]  + timedelta(minutes=4), freq='5T')
  time_df = pd.DataFrame({'timestamp':timestamp})
  time_df.set_index('timestamp', inplace=True)
  return time_df

In [18]:
def find_gaps(df, greaterthan=5, units='m'):
  # find gaps relative to index, index must be a datetime field
  # greaterthan is the number of time units to be considered a gap
  # units 'm'=minutes, 'h'=hours
  i = 0
  gaps_df = pd.DataFrame()
  while i < len(df) - 1:
    ts = df.index[i]
    next_ts = df.index[i+1]
    duration = next_ts - ts
    if duration > np.timedelta64(greaterthan, units): 
      begin_gap = ts
      end_gap = next_ts
      gaps_df = gaps_df.append({'From': begin_gap, 'To': end_gap, 'Duration': duration}, ignore_index=True)
    i = i + 1
  gaps_df.sort_values(by=['Duration'], ascending=False, inplace=True)
  return gaps_df

In [19]:
# convert time series into supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars=1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [20]:
#Finding the length of the complete time series
time_df = timedf(glucose_train)
print('Missing intervals: ', len(time_df) - len(glucose_train) )

Missing intervals:  1098


In [21]:
#Adding NA in the whole range of cgm-training
glucose_train=glucose_train.resample('5T').mean()
glucose_train.head()

Unnamed: 0_level_0,glucose
ts,Unnamed: 1_level_1
2025-05-14 00:00:00,48.0
2025-05-14 00:05:00,48.0
2025-05-14 00:10:00,53.0
2025-05-14 00:15:00,63.0
2025-05-14 00:20:00,69.0


In [22]:
#selected_items = ['glucose_level',"bolus','meal']
read_xml_data(filename='c://aadm/584-ws-testing.xml', selected_items=['glucose_level','bolus','meal'])

In [23]:
glucose_test = read_ts_file('glucose_level.csv')
glucose_test.rename(columns={"ts": "timestamp", "value": "glucose"}, inplace=True)
max=glucose_test.max()
min=glucose_test.min()

In [24]:
#Finding the length of the complete time series
time_df = timedf(glucose_test)
print('Missing intervals: ', len(time_df) - len(glucose_test) )

Missing intervals:  331


In [25]:
#Adding NA in the whole range of cgm-testing
glucose_test=glucose_test.resample('5T').mean()
glucose_test.head()

Unnamed: 0_level_0,glucose
ts,Unnamed: 1_level_1
2025-06-29 00:00:00,243.0
2025-06-29 00:05:00,253.0
2025-06-29 00:10:00,262.0
2025-06-29 00:15:00,269.0
2025-06-29 00:20:00,269.0


In [26]:
#Normalizing the data
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
datatrain = np.array(glucose_train.values.astype('float32'))
datatest = np.array(glucose_test.values.astype('float32'))
scaler = MinMaxScaler(feature_range=(0, 1))
datatrain = scaler.fit_transform(datatrain).flatten()
datatest = scaler.fit_transform(datatest).flatten()
#n = len(data)
train_data=pd.DataFrame(datatrain)
test_data=pd.DataFrame(datatest)
print(max,min)

glucose    400
dtype: int64 glucose    41
dtype: int64


In [27]:
data1=series_to_supervised(train_data, n_in=12, n_out=6, dropnan=True)
data2=series_to_supervised(test_data, n_in=12, n_out=6, dropnan=True)
train=data1.values
X_train,y_train=train[:, 0:12],train[:, 12:]
test=data2.values
X_test,y_test=test[:, 0:12],test[:, 12:]
ytest=y_test
print(y_train.shape)
print("test shape:",X_test.shape)
# reshape input to be 3D [samples, timesteps, features]
X_train_reshaped = X_train.reshape((-1,12,1))
X_test_reshaped = X_test.reshape((-1,12,1))
print(X_train_reshaped.shape,X_test_reshaped.shape)
y_train_reshaped = y_train
print(y_train_reshaped.shape)
y_test_reshaped = y_test

(11150, 6)
test shape: (2393, 12)
(11150, 12, 1) (2393, 12, 1)
(11150, 6)


In [28]:
# Testing the RNN-LSTM
#rnn = RNN()
#rnn.train(X_train_reshaped,y_train_reshaped)
#_, rmse_result, mae_result, smape_result, r2_result = rnn.evaluate(X_test_reshaped,y_test_reshaped,max,min)
#print('Result \n RMSE = %.2f  \n MAE = %.2f \n R2 = %.1f [%%]' % (rmse_result,
#                                                                            mae_result,                                                                          
#                                                                            r2_result*100))

In [29]:
## Testing the Transformer
import time
start_time = time.time()
look_back = 12
tr = Transformer()
tr.train(X_train_reshaped,y_train_reshaped)
a=tr.evaluate(X_test_reshaped,y_test_reshaped)

print(a)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 12, 1)]      0           []                               
                                                                                                  
 layer_normalization (LayerNorm  (None, 12, 1)       2           ['input_1[0][0]']                
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 12, 1)       7169        ['layer_normalization[0][0]',    
 dAttention)                                                      'layer_normalization[0][0]']    
                                                                                              

 mbda)                                                            'tf.__operators__.add_5[0][0]'] 
                                                                                                  
 layer_normalization_7 (LayerNo  (None, 12, 1)       2           ['tf.__operators__.add_6[0][0]'] 
 rmalization)                                                                                     
                                                                                                  
 conv1d_6 (Conv1D)              (None, 12, 4)        8           ['layer_normalization_7[0][0]']  
                                                                                                  
 dropout_7 (Dropout)            (None, 12, 4)        0           ['conv1d_6[0][0]']               
                                                                                                  
 conv1d_7 (Conv1D)              (None, 12, 1)        5           ['dropout_7[0][0]']              
          

Epoch 11/50

Epoch 11: loss improved from 0.00890 to 0.00835, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 12/50

Epoch 12: loss improved from 0.00835 to 0.00801, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 13/50

Epoch 13: loss improved from 0.00801 to 0.00758, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 14/50

Epoch 14: loss improved from 0.00758 to 0.00733, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 15/50

Epoch 15: loss improved from 0.00733 to 0.00716, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 16/50

Epoch 16: loss improved from 0.00716 to 0.00687, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 17/50

Epoch 17: loss improved from 0.00687 to 0.00673, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 18/50

Epoch 18: loss improved from 0.0067

Epoch 25/50

Epoch 25: loss improved from 0.00569 to 0.00544, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 26/50

Epoch 26: loss improved from 0.00544 to 0.00526, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 27/50

Epoch 27: loss did not improve from 0.00526
Epoch 28/50

Epoch 28: loss improved from 0.00526 to 0.00513, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 29/50

Epoch 29: loss did not improve from 0.00513
Epoch 30/50

Epoch 30: loss improved from 0.00513 to 0.00497, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 31/50

Epoch 31: loss improved from 0.00497 to 0.00481, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 32/50

Epoch 32: loss improved from 0.00481 to 0.00476, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 33/50

Epoch 33: loss improved from 0.00476 to 0.00464, saving m

Epoch 40/50

Epoch 40: loss improved from 0.00425 to 0.00419, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 41/50

Epoch 41: loss improved from 0.00419 to 0.00417, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 42/50

Epoch 42: loss improved from 0.00417 to 0.00408, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 43/50

Epoch 43: loss improved from 0.00408 to 0.00405, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 44/50

Epoch 44: loss improved from 0.00405 to 0.00402, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 45/50

Epoch 45: loss improved from 0.00402 to 0.00387, saving model to checkpoint\Transformer.test Wed Dec 07 16 58 36 2022.hdf5
Epoch 46/50

Epoch 46: loss did not improve from 0.00387
Epoch 47/50

Epoch 47: loss did not improve from 0.00387
Epoch 48/50

Epoch 48: loss did not improve from 0.00387
Epoch 49/50



In [30]:
max=np.array(max)
min=np.array(min)

In [31]:
#Denormalizing prediction
forec1=a*(max-min)+min
print(forec1)

[[300.43575495 301.68135786 300.86161929 301.1726402  300.08938444
  299.58092356]
 [300.77923673 303.41505069 301.5956158  300.81208277 300.66328061
  299.90189457]
 [309.32237238 309.65241617 309.77395719 308.17977977 308.48226285
  307.87355202]
 ...
 [ 98.99993265 104.4005909  109.34978737 115.18479429 118.07400416
  127.3346063 ]
 [ 99.62955473 104.6331023  110.61130509 112.59460637 119.54055816
  126.62189017]
 [ 94.48358698 103.9676438  103.64404082 108.38626984 115.18181461
  119.5047271 ]]


In [32]:
#Denomalizing the actual values
actual=ytest*(max-min)+min
print(actual)

[[299.99998331 308.99998909 311.99999815 312.99999404 311.99999815
  311.00000226]
 [308.99998909 311.99999815 312.99999404 311.99999815 311.00000226
  313.99998993]
 [311.99999815 312.99999404 311.99999815 311.00000226 313.99998993
  317.99999487]
 ...
 [ 87.0000034   85.99999681  76.99999905  70.99999698  74.00000069
   71.99999822]
 [ 85.99999681  76.99999905  70.99999698  74.00000069  71.99999822
   78.00000029]
 [ 76.99999905  70.99999698  74.00000069  71.99999822  78.00000029
   78.99999618]]


In [33]:
#Computing the RMSE
forec1=np.array(forec1)
actual=np.array(actual)
diff=actual-forec1
#print(diff.shape)
#np.sqrt(np.mean((diff)**2,axis=0))
print("RMSE:",np.sqrt(np.mean((diff)**2,axis=0)).mean())

RMSE: 15.091343108003548


In [34]:
print("--- %s seconds ---" % (time.time() - start_time))

--- 1125.688708782196 seconds ---
