In [1]:
%load_ext autotime

import os, math

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm, trange

In [2]:
from sklearn.preprocessing import MinMaxScaler
from tcn import TCN
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Input, Model
from sklearn.metrics import mean_squared_error, mean_absolute_error

def model(test_data, train_data):
    test_time = test_data['time']
    test_gl_value = test_data['gl_value']
    
    test_data.drop(columns = ['time'], inplace = True)
    train_data.rename(columns = {'bolus_type_normal dual':'bolus_type_normal_dual', 'bolus_type_square dual':'bolus_type_square_dual'}, inplace = True)
    train_data.drop(columns = ['time'], inplace = True)
    
    empty_train_col = [0]*len(train_data)
    for i, item in enumerate(test_data.columns):
        if item not in train_data.columns:
            train_data.insert(i, item, empty_train_col)

    empty_test_col = [0]*len(test_data)
    for i, item in enumerate(train_data.columns):
        if item not in test_data.columns:
            test_data.insert(i, item, empty_test_col)
    
    X_data = train_data.drop(columns = ['gl_predict'])
    y_data = train_data[['gl_predict']]
    input_dim = X_data.shape[1]
    
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(X_data)
    scaler_y.fit(y_data)
    scaled_X_train_data = scaler_x.transform(X_data)
    scaled_y_train_data = scaler_y.transform(y_data)
    scaled_X_train_data = np.reshape(scaled_X_train_data, (scaled_X_train_data.shape[0], 1, scaled_X_train_data.shape[1]))
    
    batch_size, timesteps, input_dim = 1, 1, input_dim
    i = Input(batch_shape=(batch_size, timesteps, input_dim))
    o = TCN(return_sequences = False)(i)
    o = Dense(1)(o)
    
    model = Model(inputs=[i], outputs=[o])
    model.compile(optimizer = 'adam', loss = 'mse')

    x, y = scaled_X_train_data, scaled_y_train_data
    model.fit(x, y, epochs = 10)
    X_test_data = test_data.drop(columns = ['gl_predict'])
    y_test_data = test_data[['gl_predict']]
    scaled_X_test_data = scaler_x.transform(X_test_data)
    scaled_X_test_data = np.reshape(scaled_X_test_data, (scaled_X_test_data.shape[0], 1, scaled_X_test_data.shape[1]))
    prediction = model.predict(scaled_X_test_data, batch_size = 1)
    scaled_prediction = scaler_y.inverse_transform(prediction)
    
    final_output = pd.DataFrame(columns = ['timestamp', 'predicted_BGL_value'])
    final_output['timestamp'] = test_time
#     final_output['true_BGL_value'] = test_gl_value
    final_output['predicted_BGL_value'] = scaled_prediction
    
    return (final_output, math.sqrt(mean_squared_error(scaled_prediction, y_test_data)), mean_absolute_error(scaled_prediction, y_test_data))

time: 4.69 s


In [3]:
test_data_list = ['540-ws-training.csv', '544-ws-training.csv', '552-ws-training.csv', '567-ws-training.csv', '584-ws-training.csv', '596-ws-training.csv']

rmse_list = {}
mae_list = {}
for file in os.listdir('../data/derived/5_min_complete_dataset/'):
    if file in test_data_list:
        train_data = pd.read_csv('../data/derived/5_min_complete_dataset/'+file)
        test_data = pd.read_csv('../data/derived_test/5_min_complete_dataset/'+file[0:7]+'testing.csv')
        final_output, rmse_error, mae_error = model(test_data, train_data)
        rmse_list[file] = rmse_error
        mae_list[file] = mae_error
        print ('{} - {}'.format(file, rmse_error))
        print ('{} - {}'.format(file, mae_error))
        final_output.to_csv('../data/output/tcn/'+file[0:7]+'result_5min.csv')

Train on 13247 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
584-ws-training.csv - 8.900699452532583
584-ws-training.csv - 5.2534118147794615
Train on 13535 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
567-ws-training.csv - 7.594278494327072
567-ws-training.csv - 3.7528450503997273
Train on 13628 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
596-ws-training.csv - 5.992518677717183
596-ws-training.csv - 4.2468742673036495
Train on 11096 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
552-ws-training.csv - 4.146463734305543
552-ws-training.csv - 3.0411161682097996
Train on 13104 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
544-ws

In [4]:
rmse_list

{'584-ws-training.csv': 8.900699452532583,
 '567-ws-training.csv': 7.594278494327072,
 '596-ws-training.csv': 5.992518677717183,
 '552-ws-training.csv': 4.146463734305543,
 '544-ws-training.csv': 5.718122160673547,
 '540-ws-training.csv': 7.187775968080308}

time: 2.26 ms


In [5]:
mae_list

{'584-ws-training.csv': 5.2534118147794615,
 '567-ws-training.csv': 3.7528450503997273,
 '596-ws-training.csv': 4.2468742673036495,
 '552-ws-training.csv': 3.0411161682097996,
 '544-ws-training.csv': 4.042229633240038,
 '540-ws-training.csv': 5.247398446162412}

time: 8.23 ms


In [6]:
from statistics import pstdev, mean

rmse_values = rmse_list.values()
mae_values = mae_list.values()

print ('Mean - RMSE : {}'.format(mean(rmse_values)))
print ('Standard Deviation - RMSE : {}'.format(pstdev(rmse_values)))

print ('Mean - MAE : {}'.format(mean(mae_values)))
print ('Standard Deviation - MAE : {}'.format(pstdev(mae_values)))

Mean - RMSE : 6.589976414606039
Standard Deviation - RMSE : 1.5162051168255761
Mean - MAE : 4.263979230015848
Standard Deviation - MAE : 0.7908021112979168
time: 7.38 ms
