In [1]:
import os 
import numpy as np 
import pandas as pd
import sys
from typing import Dict 
import matplotlib.pyplot as plt 
import openpyxl
import pickle

sys.path.append("..")

from libreview_utils import *
from models.training import *
from sensor_params import *
from utils import get_LibreView_CGM_X_Y_multistep

from models.multi_step.StackedLSTM import get_model as get_StackedLSTM_multi_step

from evaluation.multi_step.evaluation import model_evaluation as multi_step_model_evaluation


In [2]:
# Dataset path 
DATASET_PATH = r"C:\Users\aralmeida\Downloads\LibreViewRawData"

In [3]:
# Parameters 
sensor = libreview_sensors
N = 48
step = 1
verbose = 0
PH = 60

epochs = 1
batch_size = 4096
lr=0.001
model_name = 'StackedLSTM'
loss_function = 'root_mean_squared_error'

normalization = None


In [4]:
# # Go to the dataset directory 
# os.chdir(DATASET_PATH)

# # Read .csv or load the pickle file that contains the dictionary to avoid .csv slower reading 
# if 'libreview_data.pickle' in os.listdir():
#     with open('libreview_data.pickle', 'rb') as handle:
#         libreview_data = pickle.load(handle) # Previously generated with prepare_LibreView_data(DATASET_PATH)
# else: 
#     # If the dictionary has not been created, read the .csv files
#     libreview_data = prepare_LibreView_data(DATASET_PATH)


# # Take only the T1DM patients with at least one year in a row of CGM data with the same sensor 
# data_1yr_recordings = get_1year_LibreView_recordings_dict(libreview_data)

# # Generate the Libreview .npy files from the generated or saved dictionary 
# generate_LibreView_npy_files(data_1yr_recordings, r"/1yr_npy_files")

# # Extract an EXACT 1 year recordings from the dictionary and store them to load them separately 
# generate_LibreView_npy_1yr_recordings(data_1yr_recordings)

# Extract an EXACT 1 year recordings from the dictionary and store them to load them separately 
get_oldest_year_npys_from_LibreView_csv(DATASET_PATH)

Number of patients with at least one year of CGM data:  29


In [None]:
# Iterate over the ID folders to generate the 4-folds 
for id in os.listdir(): 
    
    # Consider only folders, not .npy or .txt files
    if ('npy' not in id) and ('txt' not in id): 
    
        # Get into the ID patient folder
        os.chdir(id)
    
        # Only read the OLDEST year of recording
        recordings = np.load('oldest_1yr_CGM.npy')
        timestamps = np.load('oldest_1yr_CGM_timestamp.npy', allow_pickle=True)

        print("PATIENT :", id)

        X, Y, X_times, Y_times = get_LibreView_CGM_X_Y_multistep(recordings, timestamps, libreview_sensors, 
                                    N, step, PH, plot = True,
                                    verbose = 0)
        
        # Print shapes of the generated X and Y
        print("X shape: ", X.shape)
        print("Y shape: ", Y.shape, "\n")
        print("~~~~~~~~~~~~~~~~~~")

        # Min-max normalization
        # X = min_max_normalization(X)
        # Y = min_max_normalization(Y)

        # month-wise 4-folds partition 
        training_cv_folds  = month_wise_LibreView_4fold_cv(X, Y, X_times, Y_times, 48)

        # Load model 
        model = get_StackedLSTM_multi_step(sensor, N=int(N),
                input_features = 1, PH=PH)
        
        predicted_points = PH/sensor['SAMPLE_PERIOD']

        for fold in training_cv_folds.keys():
            
            # If the directory fold is not created, create it
            if fold not in os.listdir():
                os.mkdir(fold)
            
            # Get into the fold directory
            os.chdir(fold)
            
            # One model training per fold
            train_model(sensor,
                        model,
                        X = training_cv_folds[fold]['X_train'],
                        Y = training_cv_folds[fold]['Y_train'],
                        N = N,
                        predicted_points = predicted_points,
                        epochs = epochs,
                        batch_size = batch_size,
                        lr = lr,
                        fold = id+"-"+model_name+"-"+fold,
                        loss_function = loss_function,
                        verbose = 1 
                        )

            # Model evaluation 
            results_normal_eval = multi_step_model_evaluation(N, PH, id+"-"+model_name+"-"+fold, normalization, training_cv_folds[fold]['X_test'],
                                                            training_cv_folds[fold]['Y_test'], predicted_points, X, loss_function)

            # Back to the parent directory 
            os.chdir('../../..')

        # Back to previous directory 
        os.chdir('..')

# Checking if we are actually taking the oldest 1-year recording 

In [None]:
# # Loop over all the patients, read the recording and timestamps and generate the 4-folds
# for id in os.listdir(): 

#     # Only consider folders, not .npy files 
#     if '.npy' not in id: 
    
#         # Get into the patient folder 
#         os.chdir(id) 

#         print(id)

#         # Load the oldest year and timestamps 
#         cgm = np.load('oldest_1yr_CGM.npy')
#         timestamps = np.load('oldest_1yr_CGM_timestamp.npy', allow_pickle=True)
#         print("Samples length: ", len(cgm), "Timestamps length: ", len(timestamps))
#         print("Fist timestamp: ", timestamps[0], "Last timestamp: ", timestamps[-1], "\n")

#         # Back to parent directory 
#         os.chdir('..')


# Checking that we are taking the oldest reading considering the MAC

In [None]:
# # Example with the ID 004
# id = '004'
# s = '001'
# r = '001'
# data = '10-7-2023'

# # id = '014'
# # s = '001'
# # r = '001'
# # data = '10-7-2023'

# id = '046'
# s = '001'
# r = '001'
# data = '11-7-2023'

# # Counter
# iter = 0

# for mac in data_1yr_recordings[id][s][r][data].keys():
        
#     # Each MAC is uniquely joint to a sensor
#     sensor = list(data_1yr_recordings[id][s][r][data][mac].keys())[0]

#     # Refresh current MAC-associated date
#     first_date_current_mac = data_1yr_recordings[id][s][r][data][mac][sensor]['CGM']['timestamp'][0]
#     print(first_date_current_mac, ": ", mac)

#     if iter == 0:
#         oldest_mac = mac
#         oldest_mac_first_date = first_date_current_mac
#         iter = iter+1
    
#     else: 
#         if (first_date_current_mac <= oldest_mac_first_date):
#             print(first_date_current_mac)
#             print("<")
#             print(oldest_mac_first_date)
#             oldest_mac = mac
#             oldest_mac_first_date = first_date_current_mac
#             iter = iter+1
#         else: 
#             # MAC remains the same as before
#             oldest_mac = oldest_mac 
#             oldest_mac_first_date = oldest_mac_first_date
#             iter = iter+1

# # Sensor associated to the oldest MAC
# sensor = list(data_1yr_recordings[id][s][r][data][oldest_mac].keys())[0]

# # Extract CGM recordings of the oldest MAC and their correspondant timestamps 
# recordings = data_1yr_recordings[id][s][r][data][oldest_mac][sensor]['CGM']['reading']
# recordings_timestamps = data_1yr_recordings[id][s][r][data][oldest_mac][sensor]['CGM']['timestamp']
        
# # Save as .npy files
# os.chdir(id)
# np.save('oldest_1yr_CGM.npy', recordings)
# np.save('oldest_1yr_CGM_timestamp.npy', recordings_timestamps)

In [None]:
# def month_wise_LibreView_4fold_cv(X: np.array, Y: np.array, X_times : np.array, Y_times : np.array, N: int) -> Dict:

#     """
#     This function partitions the data in 4 folds. Each fold contains data from 3 months of the same year.
#     With this, each model is trained and validated with all different terms in a year. The timestamps 
#     of the folds will vary depending on the patient. The oldest recorded sample in the patient will be the 
#     first sample of the first fold. The first sample of the second fold will be that sample plus 3 months,
#     and so on. This function has been designed to work with LibreView-extracted data, but can be adapted to 
#     other data sources. Information about the partitions is stored in a .txt file.

#     Data is stored in its correspondant fold in the dictionary training_partitions.

#     Args:
#     -----
#         X: input sequence of lenght N.
#         Y: output sequence.
#         X_times: timestamps of the input sequence.
#         Y_times: timestamps of the output sequence.
#         N: window size of the input data.
#         shuffle: flag that indicates whether to shuffle the data or not.
#         verbose: verbosity level. 

#     Returns:
#     --------
#         folds_dict: dictionary containing the 4 folds. Each fold contains the training and validation sets.
    

#     """

#     # Declare the dictionary to intuitively access the folds 
#     folds_dict = {'1-fold' : {'X_train' : {},
#                             'Y_train' : {},
#                             'X_test' : {},
#                             'Y_test' : {}},
#                 '2-fold' : {'X_train' : {},
#                             'Y_train' : {},
#                             'X_test' : {},
#                             'Y_test' : {}},            
#                 '3-fold' : {'X_train' : {},
#                             'Y_train' : {},
#                             'X_test' : {},
#                             'Y_test' : {}},
#                 '4-fold' : {'X_train' : {},
#                             'Y_train' : {},
#                             'X_test' : {},
#                             'Y_test' : {}}}
    
#     # Timestamp of the fold 1 is the first of the whole recording 
#     fold1_first_timestamp = X_times[0][0]

#     # Timestamp of the fold 2 is the first of the whole recording + 3 months
#     fold2_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=3)

#     # Timestamp of the fold 3 is the first of the whole recording + 6 months
#     fold3_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=6)

#     # Timestamp of the fold 4 is the first of the whole recording + 9 months
#     fold4_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=9)

#     # With the timestamps, the 4 folds are generated
#     X_fold1 = X[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#     X_fold2 = X[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#     X_fold3 = X[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#     X_fold4 = X[np.where(X_times[:,0] >= fold4_first_timestamp)] 

#     # Also save the timestamps of the fold just in case they are necessary 
#     X_times_fold1 = X_times[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#     X_times_fold2 = X_times[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#     X_times_fold3 = X_times[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#     X_times_fold4 = X_times[np.where(X_times[:,0] >= fold4_first_timestamp)]

#     # Take the same instances from Y
#     Y_fold1 = Y[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#     Y_fold2 = Y[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#     Y_fold3 = Y[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#     Y_fold4 = Y[np.where(X_times[:,0] >= fold4_first_timestamp)]

#     # Take the same instances from Y_times
#     Y_times_fold1 = Y_times[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#     Y_times_fold2 = Y_times[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#     Y_times_fold3 = Y_times[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#     Y_times_fold4 = Y_times[np.where(X_times[:,0] >= fold4_first_timestamp)]

#     lost_samples = len(X) - (len(X_fold1) + len(X_fold2) + len(X_fold3) + len(X_fold4))

#     print("Discarded instances for %i" % (lost_samples))

#     # Save valuable information in a .txt file
#     with open('4-folds_summary.txt', 'w') as f:
#         f.write('1-fold start date = {}\n'.format(fold1_first_timestamp))
#         f.write('1-fold num. samples = {}\n\n'.format(len(X_fold1)))

#         f.write('2-fold start date = {}\n'.format(fold2_first_timestamp))
#         f.write('2-fold num. samples = {}\n\n'.format(len(X_fold2)))

#         f.write('3-fold start date = {}\n'.format(fold3_first_timestamp))
#         f.write('3-fold num. samples = {}\n\n'.format(len(X_fold3)))

#         f.write('4-fold start date = {}\n'.format(fold4_first_timestamp))
#         f.write('4-fold num. samples = {}\n\n'.format(len(X_fold4)))

#         f.write('Discarded instances due to overlap = {}\n'.format(lost_samples))

#     # Concatenate XY in the same array but in a different axis. Just once to shuflle later 
#     XY_fold1 = np.concatenate((X_fold1, Y_fold1), axis=1)
#     XY_fold2 = np.concatenate((X_fold2, Y_fold2), axis=1)
#     XY_fold3 = np.concatenate((X_fold3, Y_fold3), axis=1)
#     XY_fold4 = np.concatenate((X_fold4, Y_fold4), axis=1)

#     # Create the training sets for each fold 
#     fold1_XY_train_set = np.concatenate((XY_fold1, XY_fold2, XY_fold3), axis=0)
#     fold2_XY_train_set = np.concatenate((XY_fold1, XY_fold2, XY_fold4), axis=0)
#     fold3_XY_train_set = np.concatenate((XY_fold1, XY_fold3, XY_fold4), axis=0)
#     fold4_XY_train_set = np.concatenate((XY_fold2, XY_fold3, XY_fold4), axis=0)

#     # Shuffle the training sets
#     np.random.shuffle(fold1_XY_train_set)
#     np.random.shuffle(fold2_XY_train_set)
#     np.random.shuffle(fold3_XY_train_set)
#     np.random.shuffle(fold4_XY_train_set)

#     # Split the training sets into X and Y
#     fold1_X_train = fold1_XY_train_set[:,0:N]
#     fold1_Y_train = fold1_XY_train_set[:,N:]

#     fold2_X_train = fold2_XY_train_set[:,0:N]
#     fold2_Y_train = fold2_XY_train_set[:,N:]

#     fold3_X_train = fold3_XY_train_set[:,0:N]
#     fold3_Y_train = fold3_XY_train_set[:,N:]

#     fold4_X_train = fold4_XY_train_set[:,0:N]
#     fold4_Y_train = fold4_XY_train_set[:,N:]

#     # Fill the dictionary fold-wise
#     # 1-fold
#     folds_dict['1-fold']['X_train'] = fold1_X_train
#     folds_dict['1-fold']['Y_train'] = fold1_Y_train
#     folds_dict['1-fold']['X_test'] = X_fold4
#     folds_dict['1-fold']['Y_test'] = Y_fold4

#     # 2-fold
#     folds_dict['2-fold']['X_train'] = fold2_X_train
#     folds_dict['2-fold']['Y_train'] = fold2_Y_train
#     folds_dict['2-fold']['X_test'] = X_fold3
#     folds_dict['2-fold']['Y_test'] = Y_fold3

#     # 3-fold
#     folds_dict['3-fold']['X_train'] = fold3_X_train
#     folds_dict['3-fold']['Y_train'] = fold3_Y_train
#     folds_dict['3-fold']['X_test'] = X_fold2
#     folds_dict['3-fold']['Y_test'] = Y_fold2

#     # 4-fold
#     folds_dict['3-fold']['X_train'] = fold4_X_train
#     folds_dict['3-fold']['Y_train'] = fold4_Y_train
#     folds_dict['3-fold']['X_test'] = X_fold1
#     folds_dict['3-fold']['Y_test'] = Y_fold1

#     return folds_dict

# Main function 

In [None]:
# # Iterate over the ID folders to generate the 4-folds 
# for id in os.listdir(): 
    
#     # Consider only folders, not .npy or .txt files
#     if ('npy' not in id) and ('txt' not in id): 
    
#         # Get into the ID patient folder
#         os.chdir(id)
    
#         # Only read the OLDEST year of recording
#         recordings = np.load('oldest_1yr_CGM.npy')
#         timestamps = np.load('oldest_1yr_CGM_timestamp.npy', allow_pickle=True)

#         print("PATIENT :", id)

#         X, Y, X_times, Y_times = get_LibreView_CGM_X_Y_multistep(recordings, timestamps, libreview_sensors, 
#                                     48, 1, 60, plot = True,
#                                     verbose = 0)
        
#         # Print shapes of the generated X and Y
#         print("X shape: ", X.shape)
#         print("Y shape: ", Y.shape, "\n")
#         print("~~~~~~~~~~~~~~~~~~")

#         # Min-max normalization
#         # X = min_max_normalization(X)
#         # Y = min_max_normalization(Y)

#         # month-wise 4-folds partition 
#         training_cv_folds  = month_wise_LibreView_4fold_cv(X, Y, X_times, Y_times, 48)

#         # Load model 
#         model = get_StackedLSTM_multi_step(sensor, N=int(N),
#                 input_features = 1, PH=PH)
        
#         predicted_points = PH/sensor['SAMPLE_PERIOD']

#         for fold in training_cv_folds.keys():
            
#             # If the directory fold is not created, create it
#             if fold not in os.listdir():
#                 os.mkdir(fold)
            
#             # Get into the fold directory
#             os.chdir(fold)
            
#             # One model training per fold
#             train_model(sensor,
#                         model,
#                         X = training_cv_folds[fold]['X_train'],
#                         Y = training_cv_folds[fold]['Y_train'],
#                         N = N,
#                         predicted_points = predicted_points,
#                         epochs = epochs,
#                         batch_size = batch_size,
#                         lr = lr,
#                         fold = id+"-"+model_name+"-"+fold,
#                         loss_function = loss_function,
#                         verbose = 1 
#                         )

#             # Model evaluation 
#             results_normal_eval = multi_step_model_evaluation(N, PH, id+"-"+model_name+"-"+fold, normalization, training_cv_folds[fold]['X_test'],
#                                                             training_cv_folds[fold]['Y_test'], predicted_points, X, loss_function)

#             # Back to the parent directory 
#             os.chdir('../../..')

#         # Back to previous directory 
#         os.chdir('..')

# Testing the new 4 folds function

# Fixing bugs in 4 months wise function

In [None]:
# # Iterate over the ID folders to generate the 4-folds 
# for id in os.listdir(): 
    
#     # Consider only folders, not .npy or .txt files
#     if ('npy' not in id) and ('txt' not in id): 
    
#         # Get into the ID patient folder
#         os.chdir(id)
    
#         # Only read the OLDEST year of recording
#         recordings = np.load('oldest_1yr_CGM.npy')
#         timestamps = np.load('oldest_1yr_CGM_timestamp.npy', allow_pickle=True)

#         print("PATIENT :", id)

#         X, Y, X_times, Y_times = get_LibreView_CGM_X_Y_multistep(recordings, timestamps, libreview_sensors, 
#                                     48, 1, 60, plot = False,
#                                     verbose = 0)
        
#         # Print shapes of the generated X and Y
#         # print("X shape: ", X.shape)
#         # print("Y shape: ", Y.shape, "\n")
#         # print("~~~~~~~~~~~~~~~~~~")   

#         # Declare the dictionary to intuitively access the folds 
#         folds_dict = {'1-fold' : {'X_train' : {},
#                                 'Y_train' : {},
#                                 'X_test' : {},
#                                 'Y_test' : {}},
#                     '2-fold' : {'X_train' : {},
#                                 'Y_train' : {},
#                                 'X_test' : {},
#                                 'Y_test' : {}},            
#                     '3-fold' : {'X_train' : {},
#                                 'Y_train' : {},
#                                 'X_test' : {},
#                                 'Y_test' : {}},
#                     '4-fold' : {'X_train' : {},
#                                 'Y_train' : {},
#                                 'X_test' : {},
#                                 'Y_test' : {}}}
        
#         # Timestamp of the fold 1 is the first of the whole recording 
#         fold1_first_timestamp = X_times[0][0]

#         # Timestamp of the fold 2 is the first of the whole recording + 3 months
#         fold2_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=3)

#         # Timestamp of the fold 3 is the first of the whole recording + 6 months
#         fold3_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=6)

#         # Timestamp of the fold 4 is the first of the whole recording + 9 months
#         fold4_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=9)

#         # With the timestamps, the 4 folds are generated
#         X_fold1 = X[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#         X_fold2 = X[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#         X_fold3 = X[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#         X_fold4 = X[np.where(X_times[:,0] >= fold4_first_timestamp)] 

#         # Also save the timestamps of the fold just in case they are necessary 
#         X_times_fold1 = X_times[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#         X_times_fold2 = X_times[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#         X_times_fold3 = X_times[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#         X_times_fold4 = X_times[np.where(X_times[:,0] >= fold4_first_timestamp)]

#         # Take the same instances from Y
#         Y_fold1 = Y[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#         Y_fold2 = Y[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#         Y_fold3 = Y[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#         Y_fold4 = Y[np.where(X_times[:,0] >= fold4_first_timestamp)]

#         # Take the same instances from Y_times
#         Y_times_fold1 = Y_times[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
#         Y_times_fold2 = Y_times[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
#         Y_times_fold3 = Y_times[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
#         Y_times_fold4 = Y_times[np.where(X_times[:,0] >= fold4_first_timestamp)]

#         lost_samples = len(X) - (len(X_fold1) + len(X_fold2) + len(X_fold3) + len(X_fold4))

#         # print("Discarded instances: %i" % (lost_samples))

#         # Save valuable information in a .txt file
#         with open('4-folds_summary.txt', 'w') as f:
#             f.write('1-fold start date = {}\n'.format(fold1_first_timestamp))
#             f.write('1-fold num. samples = {}\n\n'.format(len(X_fold1)))

#             f.write('2-fold start date = {}\n'.format(fold2_first_timestamp))
#             f.write('2-fold num. samples = {}\n\n'.format(len(X_fold2)))

#             f.write('3-fold start date = {}\n'.format(fold3_first_timestamp))
#             f.write('3-fold num. samples = {}\n\n'.format(len(X_fold3)))

#             f.write('4-fold start date = {}\n'.format(fold4_first_timestamp))
#             f.write('4-fold num. samples = {}\n\n'.format(len(X_fold4)))

#             f.write('Discarded instances due to overlap = {}\n'.format(lost_samples))

#         # Concatenate XY in the same array but in a different axis. Just once to shuflle later 
#         XY_fold1 = np.concatenate((X_fold1, Y_fold1), axis=1)
#         XY_fold2 = np.concatenate((X_fold2, Y_fold2), axis=1)
#         XY_fold3 = np.concatenate((X_fold3, Y_fold3), axis=1)
#         XY_fold4 = np.concatenate((X_fold4, Y_fold4), axis=1)

#         # Create the training sets for each fold 
#         fold1_XY_train_set = np.concatenate((XY_fold1, XY_fold2, XY_fold3), axis=0)
#         fold2_XY_train_set = np.concatenate((XY_fold1, XY_fold2, XY_fold4), axis=0)
#         fold3_XY_train_set = np.concatenate((XY_fold1, XY_fold3, XY_fold4), axis=0)
#         fold4_XY_train_set = np.concatenate((XY_fold2, XY_fold3, XY_fold4), axis=0)

#         # Shuffle the training sets
#         np.random.shuffle(fold1_XY_train_set)
#         np.random.shuffle(fold2_XY_train_set)
#         np.random.shuffle(fold3_XY_train_set)
#         np.random.shuffle(fold4_XY_train_set)

#         # Split the training sets into X and Y
#         fold1_X_train = fold1_XY_train_set[:,0:N]
#         fold1_Y_train = fold1_XY_train_set[:,N:]

#         fold2_X_train = fold2_XY_train_set[:,0:N]
#         fold2_Y_train = fold2_XY_train_set[:,N:]

#         fold3_X_train = fold3_XY_train_set[:,0:N]
#         fold3_Y_train = fold3_XY_train_set[:,N:]

#         fold4_X_train = fold4_XY_train_set[:,0:N]
#         fold4_Y_train = fold4_XY_train_set[:,N:]

#         # Fill the dictionary fold-wise
#         # 1-fold
#         folds_dict['1-fold']['X_train'] = fold1_X_train
#         folds_dict['1-fold']['Y_train'] = fold1_Y_train
#         folds_dict['1-fold']['X_test'] = X_fold4
#         folds_dict['1-fold']['Y_test'] = Y_fold4

#         # 2-fold
#         folds_dict['2-fold']['X_train'] = fold2_X_train
#         folds_dict['2-fold']['Y_train'] = fold2_Y_train
#         folds_dict['2-fold']['X_test'] = X_fold3
#         folds_dict['2-fold']['Y_test'] = Y_fold3

#         # 3-fold
#         folds_dict['3-fold']['X_train'] = fold3_X_train
#         folds_dict['3-fold']['Y_train'] = fold3_Y_train
#         folds_dict['3-fold']['X_test'] = X_fold2
#         folds_dict['3-fold']['Y_test'] = Y_fold2

#         # 4-fold
#         folds_dict['4-fold']['X_train'] = fold4_X_train
#         folds_dict['4-fold']['Y_train'] = fold4_Y_train
#         folds_dict['4-fold']['X_test'] = X_fold1
#         folds_dict['4-fold']['Y_test'] = Y_fold1

#         for fold in folds_dict.keys():
#             for sets in folds_dict[fold].keys():
#                 print(fold, sets, len(folds_dict[fold][sets]))
#                 if len(folds_dict[fold][sets]) == 0:
#                     print(fold, id, " --> EMPTY")
                    
#                     break
        
#         os.chdir('..')

In [None]:
# os.chdir(r"C:\Users\aralmeida\Downloads\LibreViewRawData\1yr_npy_files\004")

# # Only read the OLDEST year of recording
# recordings = np.load('oldest_1yr_CGM.npy')
# timestamps = np.load('oldest_1yr_CGM_timestamp.npy', allow_pickle=True)

# # Compute the differentce between two consecutive samples (in minutes)
# time_diff = np.diff(timestamps)

# # Empty array to fill with the values in minutes 
# time_diff_mins = np.empty(len(time_diff))

# for i in range(0, len(time_diff_mins)): 
#     time_diff_mins[i] = (time_diff[i].seconds)//60 

# # Plot all time intervals between two consecutive samples

# plt.figure()

# # Plot 
# plt.plot(time_diff_mins)

# # Horizontal line in 30 (2 consecutive samples)
# plt.axhline(y=glucose_sensor["SAMPLE_PERIOD"]*2, color='r', linestyle='-')

# # Set X label
# plt.xlabel('Sample difference')

# # Set Y label
# plt.ylabel('Minutes between sensor readings')

# # Save figure
# plt.savefig('sample_difference.png', dpi=300, bbox_inches='tight')

# # Find indexes where the difference between two consecutive samples is greater than 10 minutes
# time_diff_idx = np.where(time_diff_mins >= glucose_sensor["SAMPLE_PERIOD"]*2)

# # Number of blocks in a patient are defined when two consecutive readings surpass 2*sensor["SAMPLE_PERIOD"]
# n_blocks = len(time_diff_mins[np.where(time_diff_mins >= glucose_sensor["SAMPLE_PERIOD"]*2)])
# print("Number of blocks of is %i\n" % (n_blocks))

# # Step for the output value identification - 1: For N = 49, the output is 5 min (value 50) / 2: 10 min (value 51) / etc. 
# step = round(prediction_horizon/glucose_sensor["SAMPLE_PERIOD"])

# # Global index useful to extract the blocks for the original array
# global_idx = 0 # 1 in matlab

# # Numpy array to count samples in each block
# num_samples = np.zeros((n_blocks, 1))

# # List to store the indexes X and Y (faster computation than concatenate arrays)
# X_init_list = []
# X_end_list = []
# Y_init_list = []
# Y_end_list = []

# for i in range(0, n_blocks):

#     # Compute size of the current block
#     block_size = time_diff_idx[0][i]-global_idx
    
#     if verbose == 1:
#         print("Block size is %i" % (block_size))
    
#     # Loop until the last value possible value of the block considering N
#     for j in range(0, round(block_size - N - step)):

#         # Reference value for the initial data point to be collected 
#         X_init_list.append(global_idx+j)

#         # Reference value for the last data point to be collected 
#         X_end_list.append(global_idx+j+N)

#         # Reference value for the initial Y point to be collected 
#         Y_init_list.append(global_idx+j+N)

#         # Reference value for the last data point to be collected 
#         Y_end_list.append(global_idx+j+N+step)

#         # Count the samples of the current block
#         num_samples[i] = j+1

#     # Print number of samples 
#     if verbose == 1:
#         print("Number of samples in block %i is %i\n" % ((i+1), num_samples[i]))

#     # Update the global index
#     global_idx = time_diff_idx[0][i] 

# # Declare X an Y vector with all time and glucose concatenated data to further processing
# X = np.zeros((len(X_init_list), N), dtype=np.float32)
# Y = np.zeros((len(Y_init_list), round(prediction_horizon/glucose_sensor["SAMPLE_PERIOD"])), dtype=np.float32) # Check values on sensor_params.py and arch_params.py
# X_times = np.empty((len(X_init_list), N), dtype='datetime64[s]')
# Y_times = np.empty((len(Y_init_list), round(prediction_horizon/glucose_sensor["SAMPLE_PERIOD"])), dtype='datetime64[s]')

# for i in range(0, X.shape[0]):
#     X[i,:] = recordings[X_init_list[i] : X_end_list[i]]
#     Y[i,:] = recordings[Y_init_list[i] : Y_end_list[i]]
#     X_times[i,:] = timestamps[X_init_list[i] : X_end_list[i]]
#     Y_times[i] = timestamps[Y_init_list[i] : Y_end_list[i]] 

# # Save training dataset summary in a txt file
# with open('dataset_summary.txt', 'w') as f:
#     f.write('N = {}\n'.format(N))
#     f.write('step = {}\n'.format(step))
#     f.write('PH = {}\n'.format(prediction_horizon))
#     f.write('sensor = {}\n'.format(glucose_sensor['NAME']))
#     f.write('nº blocks = {}\n'.format(n_blocks))

# # Export X, Y and associated times as .npy files
# np.save('X.npy', X)
# np.save('Y.npy', Y)
# np.save('X_times.npy', X_times)
# np.save('Y_times.npy', Y_times) 

# # Convert np.arrays to float32 to convert them to Tensorflow tensors
# X = X.astype(np.float32)
# Y = Y.astype(np.float32)

# # Timestamp of the fold 1 is the first of the whole recording 
# fold1_first_timestamp = X_times[0][0]

# # Timestamp of the fold 2 is the first of the whole recording + 3 months
# fold2_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=3)

# # Timestamp of the fold 3 is the first of the whole recording + 6 months
# fold3_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=6)

# # Timestamp of the fold 4 is the first of the whole recording + 9 months
# fold4_first_timestamp = fold1_first_timestamp + pd.DateOffset(months=9)

# # With the timestamps, the 4 folds are generated
# X_fold1 = X[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
# X_fold2 = X[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
# X_fold3 = X[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
# X_fold4 = X[np.where(X_times[:,0] >= fold4_first_timestamp)] 

# print(len(X_fold1))
# print(len(X_fold2))
# print(len(X_fold3))
# print(len(X_fold4))


# # Also save the timestamps of the fold just in case they are necessary 
# X_times_fold1 = X_times[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
# X_times_fold2 = X_times[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
# X_times_fold3 = X_times[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
# X_times_fold4 = X_times[np.where(X_times[:,0] >= fold4_first_timestamp)]

# # Take the same instances from Y
# Y_fold1 = Y[np.where((X_times[:,0] >= fold1_first_timestamp) & (Y_times[:,0] < fold2_first_timestamp))[0]]
# Y_fold2 = Y[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
# Y_fold3 = Y[np.where((X_times[:,0] >= fold3_first_timestamp) & (Y_times[:,0] < fold4_first_timestamp))[0]]
# Y_fold4 = Y[np.where(X_times[:,0] >= fold4_first_timestamp)]

# vacio = recordings[np.where((timestamps >= fold4_first_timestamp))[0]]
# vacio_timestamps = timestamps[np.where((timestamps >= fold4_first_timestamp))[0]]
# print(len(X_fold4))
# print(len(Y_fold4))


# Check if the folds are right

In [None]:
# vector = np.linspace(48,53,4)
# random_int = np.random.randint(0, 2000)
# print(random_int)

# plt.figure()

# plt.figure()
# plt.title("fold1")
# plt.plot(fold1_X_train[random_int,:], 'ro')
# plt.plot(vector, fold1_Y_train[random_int,:], 'go')

# plt.figure()
# plt.title("fold2")
# plt.plot(fold2_X_train[random_int,:], 'ro')
# plt.plot(vector, fold2_Y_train[random_int,:], 'go')

# plt.figure()
# plt.title("fold3")
# plt.plot(fold3_X_train[random_int,:], 'ro')
# plt.plot(vector, fold3_Y_train[random_int,:], 'go')

# plt.figure()
# plt.title("fold4")
# plt.plot(fold4_X_train[random_int,:], 'ro')
# plt.plot(vector, fold4_Y_train[random_int,:], 'go')


# ######################### TEST DE COMPROBACION DE LOS FOLDS ##############################
# a1 = X[np.where((X_times[:,0] >= fold2_first_timestamp) & (X_times[:,0] < fold3_first_timestamp))[0]]
# aa1 = X[np.where((X_times[:,0] >= fold2_first_timestamp) & (Y_times[:,0] < fold3_first_timestamp))[0]]
# X_fold3 = X[np.where((X_times[:,0] >= fold3_first_timestamp) & (X_times[:,0] < fold4_first_timestamp))[0]]

# plt.figure()
# plt.title("si no tenemos en cuenta al timestamp de Y")
# plt.plot(a1[3670], label = "ultima inst. fold2")
# plt.plot(X_fold3[0], label = "primera inst. fold3")
# plt.legend()



# plt.figure()
# plt.title("si  tenemos en cuenta al timestamp de Y")
# plt.plot(aa1[3639], label = "ultima inst. fold2")
# plt.plot(X_fold3[0], label = "primera inst. fold3")
# plt.legend()

# Test if the output sequence is well-generated

In [None]:
# vector = np.linspace(48,53,4)
# plt.figure()

# plt.figure()
# plt.plot(X[4,:], 'ro')
# plt.plot(vector, Y[4,:], 'go')

# plt.figure()
# plt.plot(X[5,:], 'ro')
# plt.plot(vector, Y[5,:], 'go')
# plt.figure()
# plt.plot(X[6,:], 'ro')
# plt.plot(vector, Y[6,:], 'go')
# plt.figure()
# plt.plot(X[7,:], 'ro')
# plt.plot(vector, Y[7,:], 'go')
# plt.figure()
# plt.plot(X[8,:], 'ro')
# plt.plot(vector, Y[8,:], 'go')

