In [1]:
import os

In [2]:
import pandas as pd
from numpy import nan
from numpy import isnan
import numpy as np
from math import sqrt
from numpy import split
from numpy import array
import sys
from scipy.stats import randint
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv), data manipulation as in SQL
import matplotlib.pyplot as plt # this is used for the plot the graph 
import seaborn as sns # used for plot interactive graph. 
from sklearn.model_selection import train_test_split, KFold # to split the data into two parts
from sklearn.preprocessing import StandardScaler # for normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline # pipeline making
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics # for the check the error and accuracy of the model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


#Deep learning specific

import keras
from keras import regularizers
import itertools
from keras.layers import Dense, Dropout, LSTM, Bidirectional, RepeatVector, TimeDistributed, Flatten, BatchNormalization
from keras.models import Sequential, load_model
from keras.utils import to_categorical
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import ConvLSTM2D

Using TensorFlow backend.


In [3]:
plt.rcParams['figure.figsize'] = [90, 45]
plt.rcParams['font.size'] = 40
verbose, epochs, batch_size = 0, 100, 40
cols_to_keep = ['gl_value', 'fs_value', 'basel_value', 'basis_gsr', 'basis_skin_temp', 'bolus_dose']

In [4]:
def read_csv_file(filepath):
    df = pd.read_csv(filepath, sep = ',', parse_dates = {'dt' : ['time']}, infer_datetime_format=True, 
                 low_memory = False, na_values=['nan'], index_col = 'dt')
    df.drop(['gl_predict'], inplace = True, axis = 1)
    return df

In [5]:
csv_dir_path = os.path.abspath('../../5_min_complete_dataset/')
csv_file_path = [os.path.join(csv_dir_path, item) for item in os.listdir(csv_dir_path)]

In [6]:
def split_dataset(data, count):
    train = data[0:count]
    train = array(split(train, len(train)/12))
    return train

In [7]:
def to_supervised(train, n_input, n_out = 12):
    data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
    X = []
    y = []
    in_start = 0
    # step over the entire history one time step at a time
    for _ in range(len(data)):
        # define the end of the input sequence
        in_end = in_start + n_input
        out_end = in_end + n_out
        # ensure we have enough data for this instance
        if out_end <= len(data):
            X.append(data[in_start:in_end, :])
            y.append(data[in_end:out_end, 0])
        # move along one time step
        in_start += 1
    return array(X), array(y)

In [8]:
file_name = csv_file_path[0].split('/')[-1].split('-')[0]
df_567 = read_csv_file(csv_file_path[0])
print(df_567.columns)
df_567 = df_567[cols_to_keep]
display(df_567.head())
display(df_567.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_Lunch', 'meal_type_Snack',
       'sleep_quality', 'basis_gsr', 'basis_skin_temp', 'basis_sleep_quality',
       'basis_sleep_type', 'acceleration_value'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2026-12-28 00:00:00,72.0,106.0,0.85,0.021829,86.27,0.0
2026-12-28 00:05:00,71.0,106.0,0.85,0.021829,86.27,0.0
2026-12-28 00:10:00,71.0,106.0,0.85,0.021829,86.27,0.0
2026-12-28 00:15:00,71.0,106.0,0.85,0.021829,86.27,0.0
2026-12-28 00:20:00,71.0,106.0,0.85,0.021829,86.27,0.0


(13535, 6)

In [9]:
df_567_values = df_567.values
train_count = 13524
n_input = 6
train = split_dataset(df_567_values, train_count)

In [10]:
train.shape

(1127, 12, 6)

In [11]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13507, 6, 6)
Train Y : (13507, 12)


In [12]:
model = Sequential()
model.add(Bidirectional(LSTM(100, activation='relu', input_shape=(n_timesteps, n_features))))
model.add(RepeatVector(n_outputs))
model.add(Bidirectional(LSTM(100, activation = 'relu', return_sequences=True)))
model.add(TimeDistributed(Dense(150, activation='relu')))
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mse', optimizer = 'adam')
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)
model.save('567.h5')

In [13]:
file_name = csv_file_path[1].split('/')[-1].split('-')[0]
df_563 = read_csv_file(csv_file_path[1])
print(df_563.columns)
df_563 = df_563[cols_to_keep]
display(df_563.head())
display(df_563.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose', 'bwz_carb_input',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_HypoCorrection', 'meal_type_Lunch',
       'meal_type_Snack', 'sleep_quality', 'work_intensity',
       'exercise_intensity', 'exercise_duration', 'exercise_type',
       'basis_heart_rate', 'basis_gsr', 'basis_skin_temp', 'basis_air_temp',
       'basis_steps', 'basis_sleep_quality', 'basis_sleep_type'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-13 12:30:00,219.0,169.0,0.7,5.6e-05,87.26,0.0
2021-09-13 12:35:00,229.0,169.0,0.7,5.6e-05,87.26,0.0
2021-09-13 12:40:00,224.0,169.0,0.7,5.6e-05,87.26,0.0
2021-09-13 12:45:00,221.0,169.0,0.7,5.6e-05,87.26,0.0
2021-09-13 12:50:00,215.0,169.0,0.7,5.6e-05,87.26,0.0


(13097, 6)

In [14]:
df_563_values = df_563.values
train_count = 13092
n_input = 6
train = split_dataset(df_563_values, train_count)

In [15]:
train.shape

(1091, 12, 6)

In [16]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13075, 6, 6)
Train Y : (13075, 12)


In [17]:
model = load_model('567.h5')

In [18]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0dc4734d30>

In [19]:
model.save('563.h5')

In [20]:
file_name = csv_file_path[2].split('/')[-1].split('-')[0]
df_540 = read_csv_file(csv_file_path[2])
print(df_540.columns)
df_540 = df_540[cols_to_keep]
display(df_540.head())
display(df_540.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose',
       'bolus_type_normal', 'bolus_type_normal_dual', 'bolus_type_square_dual',
       'meal_carbs', 'meal_type_Breakfast', 'meal_type_Dinner',
       'meal_type_Lunch', 'meal_type_Snack', 'basis_gsr', 'basis_skin_temp',
       'basis_sleep_quality', 'basis_sleep_type', 'acceleration_value'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-05-19 11:30:00,76.0,80.0,1.9,0.296412,87.698,0.0
2027-05-19 11:35:00,76.0,80.0,1.9,0.329391,87.0,0.0
2027-05-19 11:40:00,72.0,80.0,1.9,0.329795,86.39,0.0
2027-05-19 11:45:00,68.0,80.0,1.9,0.326392,86.536,0.0
2027-05-19 11:50:00,65.0,80.0,1.9,0.355854,87.274,0.0


(13104, 6)

In [21]:
df_540_values = df_540.values
train_count = 13104
n_input = 6
train = split_dataset(df_540_values, train_count)

In [22]:
train.shape

(1092, 12, 6)

In [23]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13087, 6, 6)
Train Y : (13087, 12)


In [24]:
model = load_model('563.h5')

In [25]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0d2c012cc0>

In [26]:
model.save('540.h5')

In [27]:
file_name = csv_file_path[3].split('/')[-1].split('-')[0]
df_570 = read_csv_file(csv_file_path[3])
print(df_570.columns)
df_570 = df_570[cols_to_keep]
display(df_570.head())
display(df_570.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose', 'bwz_carb_input',
       'bolus_type_normal', 'bolus_type_normal_dual', 'bolus_type_square_dual',
       'meal_carbs', 'meal_type_Breakfast', 'meal_type_Dinner',
       'meal_type_HypoCorrection', 'meal_type_Lunch', 'meal_type_Snack',
       'sleep_quality', 'work_intensity', 'exercise_intensity',
       'exercise_duration', 'exercise_type', 'basis_heart_rate', 'basis_gsr',
       'basis_skin_temp', 'basis_air_temp', 'basis_steps',
       'basis_sleep_quality', 'basis_sleep_type'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-12-07 16:25:00,101.0,95.0,0.9,4.9e-05,82.58,0.0
2021-12-07 16:30:00,100.0,95.0,0.9,4.9e-05,82.58,0.0
2021-12-07 16:35:00,100.0,95.0,0.9,4.9e-05,82.58,0.0
2021-12-07 16:40:00,99.0,95.0,0.9,4.9e-05,82.58,0.0
2021-12-07 16:45:00,98.0,95.0,0.9,4.9e-05,82.58,0.0


(11610, 6)

In [28]:
df_570_values = df_570.values
train_count = 11604
n_input = 6
train = split_dataset(df_570_values, train_count)

In [29]:
train.shape

(967, 12, 6)

In [30]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (11587, 6, 6)
Train Y : (11587, 12)


In [31]:
model = load_model('540.h5')

In [32]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0cc0217a58>

In [33]:
model.save('570.h5')

In [34]:
file_name = csv_file_path[4].split('/')[-1].split('-')[0]
df_559 = read_csv_file(csv_file_path[4])
print(df_559.columns)
df_559 = df_559[cols_to_keep]
display(df_559.head())
display(df_559.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose', 'bwz_carb_input',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_HypoCorrection', 'meal_type_Lunch',
       'meal_type_Snack', 'sleep_quality', 'work_intensity',
       'exercise_intensity', 'exercise_duration', 'exercise_type',
       'basis_heart_rate', 'basis_gsr', 'basis_skin_temp', 'basis_air_temp',
       'basis_steps', 'basis_sleep_quality', 'basis_sleep_type'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-12-07 01:15:00,101.0,191.0,0.73,8.7e-05,84.92,0.0
2021-12-07 01:20:00,98.0,191.0,0.73,8.7e-05,84.92,0.0
2021-12-07 01:25:00,104.0,191.0,0.73,8.7e-05,84.92,0.0
2021-12-07 01:30:00,112.0,191.0,0.73,8.7e-05,84.92,0.0
2021-12-07 01:35:00,120.0,191.0,0.73,8.7e-05,84.92,0.0


(12080, 6)

In [35]:
df_559_values = df_559.values
train_count = 12072
n_input = 6
train = split_dataset(df_559_values, train_count)

In [36]:
train.shape

(1006, 12, 6)

In [37]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (12055, 6, 6)
Train Y : (12055, 12)


In [38]:
model = load_model('570.h5')

In [39]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0cc83e32b0>

In [40]:
model.save('559.h5')

In [41]:
file_name = csv_file_path[5].split('/')[-1].split('-')[0]
df_575 = read_csv_file(csv_file_path[5])
print(df_575.columns)
df_575 = df_575[cols_to_keep]
display(df_575.head())
display(df_575.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose', 'bwz_carb_input',
       'bolus_type_normal', 'bolus_type_normal_dual', 'bolus_type_square',
       'bolus_type_square_dual', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_HypoCorrection', 'meal_type_Lunch',
       'meal_type_Snack', 'sleep_quality', 'work_intensity',
       'exercise_intensity', 'exercise_duration', 'exercise_type',
       'basis_heart_rate', 'basis_gsr', 'basis_skin_temp', 'basis_air_temp',
       'basis_steps', 'basis_sleep_quality', 'basis_sleep_type'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-11-17 12:00:00,128.0,104.0,0.5,9.8e-05,86.0,0.0
2021-11-17 12:05:00,123.0,104.0,0.5,0.000103,86.18,0.0
2021-11-17 12:10:00,120.0,104.0,0.5,9.6e-05,86.9,0.0
2021-11-17 12:15:00,124.0,104.0,0.5,9.4e-05,85.64,0.0
2021-11-17 12:20:00,121.0,104.0,0.5,9.6e-05,85.82,0.0


(13103, 6)

In [42]:
df_575_values = df_575.values
train_count = 13092
n_input = 6
train = split_dataset(df_575_values, train_count)

In [43]:
train.shape

(1091, 12, 6)

In [44]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13075, 6, 6)
Train Y : (13075, 12)


In [45]:
model = load_model('559.h5')

In [46]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c643cd400>

In [47]:
model.save('575.h5')

In [48]:
file_name = csv_file_path[6].split('/')[-1].split('-')[0]
df_544 = read_csv_file(csv_file_path[6])
print(df_544.columns)
df_544 = df_544[cols_to_keep]
display(df_544.head())
display(df_544.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose',
       'bolus_type_normal', 'bolus_type_square', 'meal_carbs',
       'meal_type_Breakfast', 'meal_type_Dinner', 'meal_type_HypoCorrection',
       'meal_type_Lunch', 'meal_type_Snack', 'sleep_quality', 'work_intensity',
       'exercise_intensity', 'exercise_duration', 'exercise_type', 'basis_gsr',
       'basis_skin_temp', 'basis_sleep_quality', 'basis_sleep_type',
       'acceleration_value'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-05-19 11:30:00,175.0,124.0,1.8,0.361021,87.938,0.0
2027-05-19 11:35:00,176.0,124.0,1.8,0.3624,87.4,0.0
2027-05-19 11:40:00,157.0,124.0,1.8,0.378307,87.32,0.0
2027-05-19 11:45:00,156.0,124.0,1.8,0.373754,87.432,0.0
2027-05-19 11:50:00,155.0,124.0,1.8,0.39101,88.11,0.0


(13104, 6)

In [49]:
df_544_values = df_544.values
train_count = 13104
n_input = 6
train = split_dataset(df_544_values, train_count)

In [50]:
train.shape

(1092, 12, 6)

In [51]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13087, 6, 6)
Train Y : (13087, 12)


In [52]:
model = load_model('575.h5')

In [53]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c643cd7f0>

In [54]:
model.save('544.h5')

In [55]:
file_name = csv_file_path[7].split('/')[-1].split('-')[0]
df_596 = read_csv_file(csv_file_path[7])
print(df_596.columns)
df_596 = df_596[cols_to_keep]
display(df_596.head())
display(df_596.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_HypoCorrection', 'meal_type_Lunch',
       'meal_type_Snack', 'sleep_quality', 'exercise_intensity',
       'exercise_duration', 'exercise_type', 'basis_gsr', 'basis_skin_temp',
       'basis_sleep_quality', 'basis_sleep_type', 'acceleration_value'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-04-09 16:15:00,142.0,113.0,0.4,0.019279,82.664,0.0
2027-04-09 16:20:00,142.0,113.0,0.4,0.019279,82.664,0.0
2027-04-09 16:25:00,142.0,113.0,0.4,0.019279,82.664,0.0
2027-04-09 16:30:00,141.0,113.0,0.4,0.019279,82.664,0.0
2027-04-09 16:35:00,139.0,113.0,0.4,0.019279,82.664,0.0


(13628, 6)

In [56]:
df_596_values = df_596.values
train_count = 13620
n_input = 6
train = split_dataset(df_596_values, train_count)

In [57]:
train.shape

(1135, 12, 6)

In [58]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13603, 6, 6)
Train Y : (13603, 12)


In [59]:
model = load_model('544.h5')

In [60]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c4c2b64a8>

In [61]:
model.save('596.h5')

In [62]:
file_name = csv_file_path[8].split('/')[-1].split('-')[0]
df_591 = read_csv_file(csv_file_path[8])
print(df_591.columns)
df_591 = df_591[cols_to_keep]
display(df_591.head())
display(df_591.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose', 'bwz_carb_input',
       'bolus_type_normal', 'bolus_type_normal_dual', 'bolus_type_square_dual',
       'meal_carbs', 'meal_type_Breakfast', 'meal_type_Dinner',
       'meal_type_HypoCorrection', 'meal_type_Lunch', 'meal_type_Snack',
       'sleep_quality', 'exercise_intensity', 'exercise_duration',
       'exercise_type', 'basis_heart_rate', 'basis_gsr', 'basis_skin_temp',
       'basis_air_temp', 'basis_steps', 'basis_sleep_quality',
       'basis_sleep_type'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-11-30 17:05:00,160.0,263.0,0.95,0.000395,89.06,0.0
2021-11-30 17:10:00,158.0,263.0,0.95,0.000405,90.32,0.0
2021-11-30 17:15:00,160.0,263.0,0.95,0.000366,91.04,0.0
2021-11-30 17:20:00,166.0,263.0,0.95,0.000239,89.6,0.0
2021-11-30 17:25:00,175.0,263.0,0.95,0.000255,91.4,0.0


(12754, 6)

In [63]:
df_591_values = df_591.values
train_count = 12744
n_input = 6
train = split_dataset(df_591_values, train_count)

In [64]:
train.shape

(1062, 12, 6)

In [65]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (12727, 6, 6)
Train Y : (12727, 12)


In [66]:
model = load_model('596.h5')

In [67]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c3f607b00>

In [68]:
model.save('591.h5')

In [69]:
file_name = csv_file_path[9].split('/')[-1].split('-')[0]
df_588 = read_csv_file(csv_file_path[9])
print(df_588.columns)
df_588 = df_588[cols_to_keep]
display(df_588.head())
display(df_588.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose', 'bwz_carb_input',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_HypoCorrection', 'meal_type_Lunch',
       'meal_type_Snack', 'sleep_quality', 'work_intensity',
       'exercise_intensity', 'exercise_duration', 'exercise_type',
       'basis_heart_rate', 'basis_gsr', 'basis_skin_temp', 'basis_air_temp',
       'basis_steps', 'basis_sleep_quality', 'basis_sleep_type'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-08-30 11:50:00,116.0,114.0,1.25,6.8e-05,91.58,0.0
2021-08-30 11:55:00,117.0,114.0,1.25,6.7e-05,92.3,0.0
2021-08-30 12:00:00,119.0,114.0,1.25,6.6e-05,91.4,5.1
2021-08-30 12:05:00,116.0,114.0,1.25,6.6e-05,91.4,0.0
2021-08-30 12:10:00,111.0,114.0,1.25,6.6e-05,91.94,0.0


(13105, 6)

In [70]:
df_588_values = df_588.values
train_count = 13104
n_input = 6
train = split_dataset(df_588_values, train_count)

In [71]:
train.shape

(1092, 12, 6)

In [72]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13087, 6, 6)
Train Y : (13087, 12)


In [73]:
model = load_model('591.h5')

In [74]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c6403fcc0>

In [75]:
model.save('588.h5')

In [76]:
file_name = csv_file_path[10].split('/')[-1].split('-')[0]
df_584 = read_csv_file(csv_file_path[10])
print(df_584.columns)
df_584 = df_584[cols_to_keep]
display(df_584.head())
display(df_584.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_Lunch', 'meal_type_Snack',
       'sleep_quality', 'work_intensity', 'exercise_intensity',
       'exercise_duration', 'exercise_type', 'basis_gsr', 'basis_skin_temp',
       'basis_sleep_quality', 'basis_sleep_type', 'acceleration_value'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-05-14 00:00:00,48.0,197.0,1.75,0.134848,84.555,0.0
2025-05-14 00:05:00,48.0,197.0,1.75,0.134848,84.555,0.0
2025-05-14 00:10:00,53.0,197.0,1.75,0.134848,84.555,0.0
2025-05-14 00:15:00,63.0,197.0,1.75,0.134848,84.555,0.0
2025-05-14 00:20:00,69.0,197.0,1.75,0.134848,84.555,0.0


(13247, 6)

In [77]:
df_584_values = df_584.values
train_count = 13236
n_input = 6
train = split_dataset(df_584_values, train_count)

In [78]:
train.shape

(1103, 12, 6)

In [79]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (13219, 6, 6)
Train Y : (13219, 12)


In [80]:
model = load_model('588.h5')

In [81]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c3ddb6dd8>

In [82]:
model.save('584.h5')

In [83]:
file_name = csv_file_path[11].split('/')[-1].split('-')[0]
df_552 = read_csv_file(csv_file_path[11])
print(df_552.columns)
df_552 = df_552[cols_to_keep]
display(df_552.head())
display(df_552.shape)

Index(['gl_value', 'fs_value', 'basel_value', 'bolus_dose',
       'bolus_type_normal', 'meal_carbs', 'meal_type_Breakfast',
       'meal_type_Dinner', 'meal_type_HypoCorrection', 'meal_type_Lunch',
       'meal_type_Snack', 'sleep_quality', 'work_intensity',
       'exercise_intensity', 'exercise_duration', 'exercise_type', 'basis_gsr',
       'basis_skin_temp', 'basis_sleep_quality', 'basis_sleep_type',
       'acceleration_value'],
      dtype='object')


Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-04-16 11:15:00,95.0,102.0,1.4,0.015633,78.69,0.0
2025-04-16 11:20:00,86.0,85.0,1.4,0.015633,78.69,0.0
2025-04-16 11:25:00,81.0,85.0,1.4,0.015633,78.69,0.0
2025-04-16 11:30:00,81.0,85.0,1.4,0.015633,78.69,0.0
2025-04-16 11:35:00,82.0,85.0,1.4,0.015633,78.69,0.0


(11096, 6)

In [84]:
df_552_values = df_552.values
train_count = 11088
n_input = 6
train = split_dataset(df_552_values, train_count)

In [85]:
train.shape

(924, 12, 6)

In [86]:
train_x, train_y = to_supervised(train, n_input)
print("Train X :", train_x.shape)
print("Train Y :", train_y.shape)
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

Train X : (11071, 6, 6)
Train Y : (11071, 12)


In [87]:
model = load_model('584.h5')

In [88]:
model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, verbose = verbose)

<keras.callbacks.callbacks.History at 0x7f0c3e02b9e8>

In [89]:
model.save('552.h5')

In [90]:
def evaluate_forecasts(actual, predicted):
    print("Actual Shape : {}".format(actual.shape))
    print("Predicted Shape : {}".format(predicted.shape))
    scores = list()
    maes = list()
    for i in range(actual.shape[1]):
        mse = mean_squared_error(actual[:, i], predicted[:, i])
        mae = mean_absolute_error(actual[:, i], predicted[:, i])
        rmse = sqrt(mse)
        maes.append(mae)
        scores.append(rmse)
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col])**2
    score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    return score, scores, maes

In [91]:
def summarize_scores(name, score, scores, maes):
    rmse_05_min = scores[0]
    rmse_30_min = scores[5]
    rmse_60_min = scores[11]
    mae_05_min = maes[0]
    mae_30_min = maes[5]
    mae_60_min = maes[11]
    print("==========================")
    print("RMSE : ")
    print("5 Minutes : {0:0.1f}".format(rmse_05_min))
    print("30 Minutes : {0:0.1f}".format(rmse_30_min))
    print("60 Minutes : {0:0.1f}".format(rmse_60_min))
    print("==========================")
    print("==========================")
    print("MAE : ")
    print("5 Minutes : {0:0.1f}".format(mae_05_min))
    print("30 Minutes : {0:0.1f}".format(mae_30_min))
    print("60 Minutes : {0:0.1f}".format(mae_60_min))
    print("===========================")

In [92]:
def forecast(model, history, n_input):
    data = array(history)
    data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
    # retrieve last observations for input data
    input_x = data[-n_input:, :]
    # reshape into [1, n_input, n]
    input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
    # forecast the next 30 mins
    yhat = model.predict(input_x, verbose=0)
    # we only want the vector forecast
    yhat = yhat[0]
    return yhat

In [93]:
def evaluate_model(model, train, test, n_input):
    history = [x for x in train]
    # walk-forward validation over each week
    predictions = []
    for i in range(len(test)):
        # predict the 30 mins
        yhat_sequence = forecast(model, history, n_input)
        # store the predictions
        predictions.append(yhat_sequence)
        # get real observation and add to history for predicting the next 30 mins
        history.append(test[i, :])
    # evaluate predictions days for each 30 mins
    predictions = array(predictions)
    score, scores, maes = evaluate_forecasts(test[:, :, 0], predictions)

    return score, scores, maes

In [94]:
csv_test_dir_path = os.path.abspath('../../5_min_complete_dataset_testing/')
csv_test_file_path = [os.path.join(csv_test_dir_path, item) for item in os.listdir(csv_test_dir_path)]
csv_test_file_path

['/KDH2020/KDH2020_BGLP_Challenge/practice/final_submission/5_min_complete_dataset_testing/584-ws-testing.csv',
 '/KDH2020/KDH2020_BGLP_Challenge/practice/final_submission/5_min_complete_dataset_testing/540-ws-testing.csv',
 '/KDH2020/KDH2020_BGLP_Challenge/practice/final_submission/5_min_complete_dataset_testing/544-ws-testing.csv',
 '/KDH2020/KDH2020_BGLP_Challenge/practice/final_submission/5_min_complete_dataset_testing/552-ws-testing.csv',
 '/KDH2020/KDH2020_BGLP_Challenge/practice/final_submission/5_min_complete_dataset_testing/596-ws-testing.csv',
 '/KDH2020/KDH2020_BGLP_Challenge/practice/final_submission/5_min_complete_dataset_testing/567-ws-testing.csv']

In [95]:
test_df_567 = read_csv_file(csv_test_file_path[5])
test_df_567 = test_df_567[cols_to_keep]
display(test_df_567.head())
display(test_df_567.shape)

Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-02-13 00:00:00,296.0,106.0,0.85,0.15768,95.768,0.0
2027-02-13 00:05:00,290.0,106.0,0.85,0.111388,95.854,0.0
2027-02-13 00:10:00,284.0,106.0,0.85,0.019517,95.138,0.0
2027-02-13 00:15:00,279.0,106.0,0.85,0.128944,94.778,0.0
2027-02-13 00:20:00,272.0,106.0,0.85,0.46483,95.264,0.0


(2870, 6)

In [96]:
test_values_567 = test_df_567.values
test_count = 2868
test = split_dataset(test_values_567, test_count)
test.shape

(239, 12, 6)

In [97]:
model = load_model('552.h5')

In [98]:
score, scores, maes = evaluate_model(model, train, test, n_input)

Actual Shape : (239, 12)
Predicted Shape : (239, 12, 1)


In [99]:
summarize_scores('seq2seq_lstm', score, scores, maes)
# time_steps = ['5', '10', '15', '20', '25', '30']
# plt.plot(time_steps, scores, marker = 'o', label = 'lstm')
# plt.show()

RMSE : 
5 Minutes : 22.9
30 Minutes : 30.8
60 Minutes : 53.9
MAE : 
5 Minutes : 10.9
30 Minutes : 20.5
60 Minutes : 37.1


In [100]:
#df_567.columns

In [101]:
#test_df_567.columns

In [102]:
test_df_540 = read_csv_file(csv_test_file_path[1])
test_df_540 = test_df_540[cols_to_keep]

In [103]:
display(test_df_540.head())
display(test_df_540.shape)

Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-07-04 00:00:00,254.0,204.0,2.4,0.006219,84.47,0.0
2027-07-04 00:05:00,250.0,204.0,2.4,0.006219,84.47,0.0
2027-07-04 00:10:00,249.0,204.0,2.4,0.006219,84.47,0.0
2027-07-04 00:15:00,247.0,204.0,2.4,0.006219,84.47,0.0
2027-07-04 00:20:00,242.0,204.0,2.4,0.006219,84.47,0.0


(3065, 6)

In [104]:
test_values_540 = test_df_540.values
test_count = 3060
test = split_dataset(test_values_540, test_count)
test.shape

(255, 12, 6)

In [105]:
score, scores, maes = evaluate_model(model, train, test, n_input)
summarize_scores('seq2seq_lstm', score, scores, maes)
# time_steps = ['5', '10', '15', '20', '25', '30']
# plt.plot(time_steps, scores, marker = 'o', label = 'lstm')
# plt.show()

Actual Shape : (255, 12)
Predicted Shape : (255, 12, 1)
RMSE : 
5 Minutes : 15.0
30 Minutes : 32.7
60 Minutes : 55.1
MAE : 
5 Minutes : 10.4
30 Minutes : 22.9
60 Minutes : 36.9


In [106]:
test_df_544 = read_csv_file(csv_test_file_path[2])
test_df_544 = test_df_544[cols_to_keep]
display(test_df_544.head())
display(test_df_544.shape)
test_values_544 = test_df_544.values
test_count = 3132
test = split_dataset(test_values_544, test_count)
test.shape

Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-06-24 00:00:00,161.0,66.0,1.7,0.025451,88.29,0.0
2027-06-24 00:05:00,155.0,66.0,1.7,0.055464,88.888,0.0
2027-06-24 00:10:00,150.0,66.0,1.7,0.075235,90.964,0.0
2027-06-24 00:15:00,147.0,66.0,1.7,0.10142,92.132,0.0
2027-06-24 00:20:00,144.0,66.0,1.7,0.106214,92.68,0.0


(3135, 6)

(261, 12, 6)

In [107]:
score, scores, maes = evaluate_model(model, train, test, n_input)
summarize_scores('seq2seq_lstm', score, scores, maes)
# time_steps = ['5', '10', '15', '20', '25', '30']
# plt.plot(time_steps, scores, marker = 'o', label = 'lstm')
# plt.show()

Actual Shape : (261, 12)
Predicted Shape : (261, 12, 1)
RMSE : 
5 Minutes : 23.5
30 Minutes : 31.5
60 Minutes : 64.3
MAE : 
5 Minutes : 11.8
30 Minutes : 19.9
60 Minutes : 36.8


In [108]:
test_df_596 = read_csv_file(csv_test_file_path[4])
test_df_596 = test_df_596[cols_to_keep]
display(test_df_596.head())
display(test_df_596.shape)
test_values_596 = test_df_596.values
test_count = 3000
test = split_dataset(test_values_596, test_count)
test.shape

Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2027-05-27 00:00:00,174.0,183.0,0.4,0.011845,84.49,0.0
2027-05-27 00:05:00,173.0,183.0,0.4,0.011845,84.49,0.0
2027-05-27 00:10:00,173.0,183.0,0.4,0.011845,84.49,0.0
2027-05-27 00:15:00,172.0,183.0,0.4,0.011845,84.49,0.0
2027-05-27 00:20:00,170.0,183.0,0.4,0.011845,84.49,0.0


(3002, 6)

(250, 12, 6)

In [109]:
score, scores, maes = evaluate_model(model, train, test, n_input)
summarize_scores('seq2seq_lstm', score, scores, maes)
# time_steps = ['5', '10', '15', '20', '25', '30']
# plt.plot(time_steps, scores, marker = 'o', label = 'lstm')
# plt.show()

Actual Shape : (250, 12)
Predicted Shape : (250, 12, 1)
RMSE : 
5 Minutes : 8.2
30 Minutes : 18.0
60 Minutes : 29.9
MAE : 
5 Minutes : 5.7
30 Minutes : 12.8
60 Minutes : 22.2


In [110]:
test_df_584 = read_csv_file(csv_test_file_path[0])
test_df_584 = test_df_584[cols_to_keep]
display(test_df_584.head())
display(test_df_584.shape)
test_values_584 = test_df_584.values
test_count = 2988
test = split_dataset(test_values_584, test_count)
test.shape

Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-06-29 00:00:00,243.0,262.0,1.75,0.141934,85.9125,0.0
2025-06-29 00:05:00,253.0,262.0,1.75,0.130756,85.854,0.0
2025-06-29 00:10:00,262.0,262.0,1.75,0.123876,85.822,0.0
2025-06-29 00:15:00,269.0,262.0,1.75,0.122054,85.838,0.0
2025-06-29 00:20:00,269.0,262.0,1.75,0.135822,85.876,0.0


(2994, 6)

(249, 12, 6)

In [111]:
score, scores, maes = evaluate_model(model, train, test, n_input)
summarize_scores('seq2seq_lstm', score, scores, maes)
# time_steps = ['5', '10', '15', '20', '25', '30']
# plt.plot(time_steps, scores, marker = 'o', label = 'lstm')
# plt.show()

Actual Shape : (249, 12)
Predicted Shape : (249, 12, 1)
RMSE : 
5 Minutes : 48.0
30 Minutes : 37.3
60 Minutes : 58.5
MAE : 
5 Minutes : 22.2
30 Minutes : 23.9
60 Minutes : 38.2


In [112]:
test_df_552 = read_csv_file(csv_test_file_path[3])
test_df_552 = test_df_552[cols_to_keep]
display(test_df_552.head())
display(test_df_552.shape)
test_values_552 = test_df_552.values
test_count = 3948
test = split_dataset(test_values_552, test_count)
test.shape

Unnamed: 0_level_0,gl_value,fs_value,basel_value,basis_gsr,basis_skin_temp,bolus_dose
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-05-25 00:00:00,181.0,112.0,1.1,0.006013,85.94,2.025
2025-05-25 00:05:00,180.0,112.0,1.1,0.006013,85.94,0.0
2025-05-25 00:10:00,178.0,112.0,1.1,0.006013,85.94,0.0
2025-05-25 00:15:00,178.0,112.0,1.1,0.006013,85.94,0.0
2025-05-25 00:20:00,176.0,112.0,1.1,0.006013,85.94,0.0


(3949, 6)

(329, 12, 6)

In [113]:
score, scores, maes = evaluate_model(model, train, test, n_input)
summarize_scores('seq2seq_lstm', score, scores, maes)
# time_steps = ['5', '10', '15', '20', '25', '30']
# plt.plot(time_steps, scores, marker = 'o', label = 'lstm')
# plt.show()

Actual Shape : (329, 12)
Predicted Shape : (329, 12, 1)
RMSE : 
5 Minutes : 7.1
30 Minutes : 14.3
60 Minutes : 25.0
MAE : 
5 Minutes : 5.2
30 Minutes : 9.5
60 Minutes : 18.2
