In [None]:
import os 
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import time 
import pickle
import warnings

from utils import *

from sensor_params import *

from models.training import *

# Single step forecasting models and functions 
from models.single_step.unet1D_regressor import *
from models.single_step.unet1D_regressor import get_model as get_unet1D_single_step

from models.single_step.unet1D_DIL_regressor import *
from models.single_step.unet1D_DIL_regressor import get_model as get_DIL_unet1D_single_step

from models.single_step.LSTMVanilla import *
from models.single_step.LSTMVanilla import get_model as get_LSTM_single_step

from models.single_step.unet1D_LSTM_regressor import *
from models.single_step.unet1D_LSTM_regressor import get_model as get_unet1DLSTM_single_step

from models.single_step.unet1D_nonCompres_regressor import *
from models.single_step.unet1D_nonCompres_regressor import get_model as get_unet1D_nonCompres_single_step

from models.single_step.StackedLSTM import *
from models.single_step.StackedLSTM import get_model as get_StackedLSTM_single_step

from evaluation.single_step.evaluation import model_evaluation as single_step_model_evaluation

# Multi step forecasting models and functions
from models.multi_step.unet1D_regressor import *
from models.multi_step.unet1D_regressor import get_model as get_unet1D_multi_step

# from models.multi_step.unet1D_DIL_regressor import *
# from models.multi_step.unet1D_DIL_regressor import get_model as get_DIL_unet1D_multi_step

from models.multi_step.LSTMVanilla import *
from models.multi_step.LSTMVanilla import get_model as get_LSTM_multi_step

# from models.multi_step.unet1D_LSTM_regressor import *
# from models.multi_step.unet1D_LSTM_regressor import get_model as get_unet1DLSTM_multi_step

# from models.multi_step.unet1D_nonCompres_regressor import *
# from models.multi_step.unet1D_nonCompres_regressor import get_model as get_unet1D_nonCompres_multi_step

from models.multi_step.StackedLSTM import *
from models.multi_step.StackedLSTM import get_model as get_StackedLSTM_multi_step

from evaluation.multi_step.evaluation import model_evaluation as multi_step_model_evaluation

# Dictionaty with the configurations to train all models 
from training_config import training_config 

# Dataset path 
DATABASE_PATH = r"C:\Users\aralmeida\OneDrive - Universidad de Las Palmas de Gran Canaria\Doctorado\Bases de datos\WARIFA\Mikael T1DM"

filename = "MIKAEL_data.json"

parent_directory = r"C:\Users\aralmeida\Downloads"
experiments_folder = r"\T1DM_pred_experiments" 

In [None]:
# Load data from the json file. If executed, comment the line and load the data from the pickle file
# data , basal_data_dict, blood_pressure_dict, bolus_data_dict, exercise_data_dict, carbohydrates_data_dict, pump_events_data_dict, sgv_data_dict, sleep_data_dict, smgb_data_dict, steps_data_dict, weight_dict  = extract_Mikael_data(DATABASE_PATH, filename, ONLY_CGM = True)
with open(DATABASE_PATH+'\CGM.pk1', 'rb') as f:
    sgv_data_dict = pickle.load(f)
os.chdir(parent_directory)

In [None]:
training_config['partition']

In [4]:
# Counter 
i = 0
total_exps = len(training_config['sensor'])*len(training_config['N'])*len(training_config['step'])*len(training_config['PH'])*len(training_config['single_multi_step'])*len(training_config['partition'])*len(training_config['normalization'])*len(training_config['under_over_sampling'])*len(training_config['model'])

# If not created, create a dictionary
results_dictionary = create_results_dictionary(parent_directory, experiments_folder)

for sensors in range(len(training_config['sensor'])):
    for lengths in range(len(training_config['N'])):
        for steps in range(len(training_config['step'])):
            for PHs in range(len(training_config['PH'])):
                for predic_type in range(len(training_config['single_multi_step'])):
                    for partition in range(len(training_config['partition'])):
                        for norm_steps in range(len(training_config['normalization'])):
                            for under_over_samp in range(len(training_config['under_over_sampling'])):
                                for model_names in range(len(training_config['model'])):
                            
                                    # Update parameters 
                                    sensor = training_config['sensor'][sensors]
                                    N = training_config['N'][lengths]
                                    step = training_config['step'][steps]
                                    PH = training_config['PH'][PHs]
                                    single_multi_step = training_config['single_multi_step'][predic_type]
                                    data_partition = training_config['partition'][partition]
                                    normalization = training_config['normalization'][norm_steps]
                                    under_over_sampling = training_config['under_over_sampling'][under_over_samp]
                                    model_name = training_config['model'][model_names]

                                    key = get_dictionary_key(sensor, single_multi_step, N, step, PH, data_partition, normalization, under_over_sampling, model_name)

                                    # If not created the directory correspondant with this configuration, create it
                                    subdirectory = r"\{}\N{}\step{}\PH{}\{}\{}\norm_{}\{}_sampling\{}".format(sensor["NAME"], N, step, PH, single_multi_step, data_partition, normalization, under_over_sampling, model_name)
                                    if not os.path.exists(parent_directory+experiments_folder+subdirectory):
                                        os.makedirs(parent_directory+experiments_folder+subdirectory)
                                    
                                    # Go to subdirectory 
                                    os.chdir(parent_directory+experiments_folder+subdirectory)

                                    # Counter
                                    i = i+1

                                    print("~~~~~~~~~~~~~~~~~~~~~~~~~~\nRunning experiment %d/%d:\n" % (i, total_exps))

                                    print("Configuration:\nsensor = %s\nN = %d\nstep = %d\nPH = %d\nsingle/multi step = %s\npartition = %s\nnorm = %s\nunder-over = %s\nmodel : %s\n" 
                                        % (sensor["NAME"], N, step, PH, single_multi_step, data_partition, normalization, under_over_sampling, model_name))

                                    # Generate X and Y
                                    if single_multi_step == 'single':
                                        X, Y, X_times, Y_times = get_CGM_X_Y(sgv_data_dict, sensor, N, step, PH, experiments_folder, plot=False, verbose = 0)
                                    elif single_multi_step == 'multi':
                                        X, Y, X_times, Y_times = get_CGM_X_Y_multistep(sgv_data_dict, sensor, N, step, PH, experiments_folder, plot=False, verbose = 0)  
                                    else:   
                                        raise ValueError("'single' or 'multi' step forecasting must be specified in 'training_config.py'")

                                    # Data normalization
                                    if normalization == 'min-max':
                                        X_norm = (X - np.min(X))/(np.max(X) - np.min(X))
                                        Y_norm = (Y - np.min(X))/(np.max(X) - np.min(X))
                                    elif normalization == None: 
                                        X_norm = X
                                        Y_norm = Y 
                                    else: 
                                        raise ValueError("Not valid normalization: only 'min-max' or None are currently supported")
                                    
                                    # Data partition 
                                    if data_partition == 'june-21':
        
                                        # Simple partition (imitating Himar work for comparison): X_train until 30/05/2021 and X_test from 31/05/2021
                                        # Instances that include two days are removed
                                        X_train = X_norm[np.where(X_times[:,N-1] <= pd.to_datetime('2021-05-31 00:00:00'))[0]]
                                        Y_train = Y_norm[np.where((Y_times[:,0] < pd.to_datetime('2021-05-30 23:59:59')))[0]]
                                        X_test = X_norm[np.where((X_times[:,N-1] > pd.to_datetime('2021-06-01 00:00:00')))[0]]
                                        Y_test = Y_norm[np.where((Y_times[:,0] > pd.to_datetime('2021-06-01 00:00:00')))[0]] # Left non-normalized to compute the metrics

                                        print("X_train shape: ",str(X_train.shape))
                                        print("Y_train shape: ",str(Y_train.shape))
                                        print("X_test shape: ",str(X_test.shape))
                                        print("Y_test shape: ",str(Y_test.shape), "\n")

                                    elif data_partition == 'month-wise-4-folds':
                                        pass
                                    else: 
                                        raise ValueError("Partition name not valid")

                                    # Apply (or not) undersampling or oversampling in training 
                                    if under_over_sampling == 'under':
                                        X_train, Y_train  = undersample_normal_range_outputs(X, X_train, Y_train, multi_step=False, normalization = normalization, undersampling_factor = 2)
                                    elif under_over_sampling == None: 
                                        pass
                                    
                                    ############## Parameters for training
                                    # Best hyperparameters  for the model N 48
                                    lr = 0.0001
                                    batch_size = 2
                                    epochs = 10

                                    # Convolution layers hyperparameters 
                                    kernel_size = 3
                                    tau = 1

                                    lr = 0.0001
                                    batch_size = 256
                                    epochs = 2   
                                    ############################### 
                                
                                    # Get model instance depending on the model name and the model type (single or multi step)
                                    if single_multi_step == 'single':
                                        if model_name == '1D-UNET':
                                            model =  get_unet1D_single_step(N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS,
                                                            tau=tau,
                                                            kernel_size=kernel_size)

                                        elif model_name == '1D-UNET-non-compres':
                                            model =  get_unet1D_nonCompres_single_step(N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS,
                                                            tau=tau,
                                                            kernel_size=kernel_size)

                                        elif model_name == 'DIL-1D-UNET':
                                            model =  get_DIL_unet1D_single_step(N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS,
                                                            tau=tau,
                                                            kernel_size=kernel_size,
                                                            dilation_rate=1)

                                        elif model_name == 'LSTM':
                                            model =  get_LSTM_single_step(N=int(N),
                                                            input_features = NUMBER_OF_INPUT_SIGNALS)

                                        elif model_name == '1D-UNET-LSTM':
                                            model =  get_unet1DLSTM_single_step(N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS)

                                        elif model_name == 'StackedLSTM':
                                            model = get_StackedLSTM_single_step(N=int(N),
                                                            input_features = NUMBER_OF_INPUT_SIGNALS)                                            

                                        else: 
                                            raise ValueError("Model name not valid")
                                    
                                    elif single_multi_step == 'multi':
                                        if model_name == '1D-UNET':
                                            model =  get_unet1D_multi_step(sensor, 
                                                            N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS,
                                                            tau=tau,
                                                            kernel_size=kernel_size)

                                        elif model_name == '1D-UNET-non-compres':
                                            model =  get_unet1D_nonCompres_multi_step(sensor, N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS,
                                                            tau=tau,
                                                            kernel_size=kernel_size,
                                                            PH=PH)

                                        elif model_name == 'DIL-1D-UNET':
                                            model =  get_DIL_unet1D_multi_step(N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS,
                                                            tau=tau,
                                                            kernel_size=kernel_size,
                                                            dilation_rate=1,
                                                            PH=PH)

                                        elif model_name == 'LSTM':
                                            model =  get_LSTM_multi_step(sensor, N=int(N),
                                                            input_features = NUMBER_OF_INPUT_SIGNALS, PH=PH)

                                        elif model_name == '1D-UNET-LSTM':
                                            model =  get_unet1DLSTM_multi_step(sensor, N=N,
                                                            input_features = NUMBER_OF_INPUT_SIGNALS, PH=PH)

                                        elif model_name == 'StackedLSTM':
                                            model = get_StackedLSTM_multi_step(sensor, N=int(N),
                                                            input_features = NUMBER_OF_INPUT_SIGNALS, PH=PH)

                                        else: 
                                            raise ValueError("Model name not valid")
                                                        
                                    # Number of predicting points depends on if its single step or multi-step 
                                    if single_multi_step == 'single':
                                        predicted_points = 1
                                    elif single_multi_step == 'multi':
                                        predicted_points = PH/sensor['SAMPLE_PERIOD']

                                    # Model training 
                                    train_model(sensor,
                                            model,
                                            X = X_train,
                                            Y = Y_train,
                                            N = N,
                                            predicted_points = predicted_points,
                                            epochs = epochs,
                                            batch_size = batch_size,
                                            lr = lr,
                                            fold = model_name,
                                            loss_function = 'root_mean_squared_error',
                                            verbose = 1 
                                            ) 

                                    # Model evaluation depending on the forecast type: single or multi step
                                    if single_multi_step == 'single':
                                        results = single_step_model_evaluation(N, PH, model_name, normalization, X_test, Y_test, X)
                                    elif single_multi_step == 'multi':
                                        results = multi_step_model_evaluation(N, PH, model_name, normalization, X_test, Y_test, X)
                                
                                    # Update dictionary with the results 
                                    results_dictionary[key] = results

                                    # Stop when the counter is equal to the total number of experiments
                                    if i == total_exps:
                                       break

# Go to experiment folder
os.chdir(parent_directory+experiments_folder)

# Save updated dictionary 
with open('results_dictionary.json', 'w') as fp:
        json.dump(results_dictionary, fp)





In [None]:
N = 144
step = 1
PH = 5
pred_steps = 1
normalization = 'min-max'
undersampling = True
data_partition = 'june-21'
name = '1DUNET-non-compres_himar-rep'

# Parameters for training
# Best hyperparameters  for the model N 48
lr = 0.0001
batch_size = 2
epochs = 10

# Convolution layers hyperparameters 
kernel_size = 3
tau = 1

lr = 0.0001
batch_size = 256
epochs = 2

In [None]:
X, Y, X_times, Y_times = get_CGM_X_Y(sgv_data_dict, sensor_Mikael, N, step, PH, experiments_folder, plot=False, verbose = 0)

In [None]:
# Min-max normalization
if normalization == 'min-max':
    X_norm = (X - np.min(X))/(np.max(X) - np.min(X))
    Y_norm = (Y - np.min(X))/(np.max(X) - np.min(X))
elif normalization == None: 
    X_norm = X
    Y_norm = Y

In [None]:
if data_partition == 'june-21':
    
    # Simple partition (imitating Himar work for comparison): X_train until 30/05/2021 and X_test from 31/05/2021
    # Instances that include two days are removed
    X_train = X_norm[np.where(X_times[:,N-1] <= pd.to_datetime('2021-05-31 00:00:00'))[0]]
    Y_train = Y_norm[np.where((Y_times[:,0] < pd.to_datetime('2021-05-30 23:59:59')))[0]]
    X_test = X_norm[np.where((X_times[:,N-1] > pd.to_datetime('2021-06-01 00:00:00')))[0]]
    Y_test = Y_norm[np.where((Y_times[:,0] > pd.to_datetime('2021-06-01 00:00:00')))[0]] # Left non-normalized to compute the metrics

    print("X_train shape: ",str(X_train.shape))
    print("Y_train shape: ",str(Y_train.shape))
    print("X_test shape: ",str(X_test.shape))
    print("Y_test shape: ",str(Y_test.shape))

elif data_partition == 'month-wise-4-folds':
    pass

In [None]:
if undersampling == True:
    X_train_, Y_train_  = undersample_normal_range_outputs(X, X_train, Y_train, multi_step=False, normalization = normalization, undersampling_factor = 2)
else : 
    pass

In [None]:
# Get model instance depending on the model name
if name == '1DUNET_himar-rep':
    model =  get_unet1D(N=N,
                    input_features = NUMBER_OF_INPUT_SIGNALS,
                    tau=tau,
                    kernel_size=kernel_size)

elif name == '1DUNET-non-compres_himar-rep':
    model =  get_unet1D_nonCompres(N=N,
                    input_features = NUMBER_OF_INPUT_SIGNALS,
                    tau=tau,
                    kernel_size=kernel_size)

elif name == '1DUNET-DIL_himar-rep':
    model =  get_DIL_unet1D(N=N,
                    input_features = NUMBER_OF_INPUT_SIGNALS,
                    tau=tau,
                    kernel_size=kernel_size,
                    dilation_rate=1)

elif name == 'LSTM_himar-rep':
    model =  get_LSTM(N=int(N),
                    input_features = NUMBER_OF_INPUT_SIGNALS)

elif name == '1DUNET-LSTM_himar-rep':
    model =  get_unet1DLSTM(N=N,
                    input_features = NUMBER_OF_INPUT_SIGNALS)
else: 
    raise ValueError("Model name not valid")

In [None]:
train_model(sensor_Mikael,
            model,
            X = X_train,
            Y = Y_train,
            N = N,
            predicted_points = 1,
            epochs = epochs,
            batch_size = batch_size,
            lr = lr,
            verbose = 1, 
            fold = name)

In [None]:
model_evaluation(N, PH, name, normalization, X_test, Y_test, X)