# 3. Target creation
The objective of this notebook is to create the target variables for ML and statistical analysis.

### Target variables
The targets of interest are hypo- and hyper-glycemia. A single point in either is considered as a positive result.
The time periods of interest for prediction are:
- during exercise
- the 1 hr after exercise
- the 4 hrs after exercise

### Features
- CGM extracted features
    - Start and end glucose
    - Metrics of glycemic control (e.g. time in range, average glucose) calculated using diametrics
    - features extracted using tsfresh, a python package that automatically calculates a large number of time series characteristics (https://tsfresh.readthedocs.io/en/latest/)
    - Time series itself
- Exercise diary data (e.g. duration of bout, type of exercise)
- Demographics data
- Lab data

### Objectives:
1. Calculate start and end glucose for each bout
2. Divide CGM data into periods of interest
3. Extract metrics of glycemic control for each period
4. Extract tsfresh metrics for each period
5. Extract time series
5. Combine extracted features into dataframes

## 3.0. Import packages

In [66]:
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
import datetime
import os
from functools import reduce
import preprocess_helper
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute, make_forecasting_frame
from tsfresh.feature_extraction import ComprehensiveFCParameters, settings
import warnings
warnings.filterwarnings('ignore')

import sys
# Change path to wherever Diametrics is
path = "/Users/cr591/OneDrive - University of Exeter/Desktop/diametrics/diametrics" #### CHANGE
sys.path.append(path)
import metrics as cgm

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [67]:
# Define global parameters
fift_mins = timedelta(minutes=15)
thirt_mins = timedelta(minutes=30)

In [68]:
directory = '../../Data/tidy_data/'

## 3.1. Upload data

### 3.1.1. Upload EXTOD 101 data

In [69]:
# Exercise diaries
extod_101_diaries = pd.read_csv(directory + 'extod_101_diaries.csv')
extod_101_diaries['start_datetime'] = pd.to_datetime(extod_101_diaries['start_datetime'])
extod_101_diaries['finish_datetime'] = pd.to_datetime(extod_101_diaries['finish_datetime'])

In [70]:
extod_101_diaries.shape[0]

776

In [71]:
# Set the durations of interest to 10-120 mins 
extod_101_diaries = extod_101_diaries.loc[(extod_101_diaries.duration>=10) & (extod_101_diaries.duration<=120)]

In [72]:
extod_101_diaries.shape[0]

688

In [73]:
# CGM data
extod_101_cgm = pd.read_csv(directory +'extod_101_cgm.csv')
extod_101_cgm['time'] = pd.to_datetime(extod_101_cgm['time'])

In [74]:
# Demo lab data
extod_101_demo_lab = pd.read_csv(directory + 'extod_101_demo_lab.csv')

### 3.1.2. Upload EXTOD education data

In [75]:
# Exercise diaries
extod_edu_diaries = pd.read_csv(directory + 'extod_edu_diaries.csv')
extod_edu_diaries['start_datetime'] = pd.to_datetime(
    extod_edu_diaries['start_datetime'])
extod_edu_diaries['finish_datetime'] = pd.to_datetime(
    extod_edu_diaries['finish_datetime'])

In [76]:
extod_edu_diaries.shape[0]

875

In [77]:
# Set the durations of interest to 10-300 mins 
extod_edu_diaries = extod_edu_diaries.loc[(extod_edu_diaries.duration>=10) &
                                          (extod_edu_diaries.duration<=120)]

In [78]:
extod_edu_diaries.shape[0]

804

In [79]:
# CGM data
extod_edu_cgm = pd.read_csv(directory + 'extod_edu_cgm.csv')
extod_edu_cgm['time'] = pd.to_datetime(extod_edu_cgm['time'])
#extod_edu_cgm['id'] = str(extod_edu_cgm.ID) + extod_edu_cgm.period
# Drop scan_glc column and null values
extod_edu_cgm = extod_edu_cgm.drop(columns='period').dropna()

In [80]:
# Demo lab data
extod_edu_demo_lab = pd.read_csv(directory + 'extod_edu_demo_lab.csv')

In [81]:
# Combine demo-lab data from both studied
demo_lab = pd.concat([extod_101_demo_lab, extod_edu_demo_lab], axis=0)

## 3.2. Calculate start and end glucose

In [82]:
# Calculate start + end glucose for 101 diaries
extod_101_diaries[['start_glc','start_roc']] = extod_101_diaries.apply(lambda row: preprocess_helper.calc_glc_roc(
    extod_101_cgm.loc[extod_101_cgm['ID']==row.ID], row.start_datetime, 30,
    True), axis=1)
extod_101_diaries[['end_glc','end_roc']] = extod_101_diaries.apply(lambda row: preprocess_helper.calc_glc_roc(
    extod_101_cgm.loc[extod_101_cgm['ID']==row.ID], row.finish_datetime,
    row.duration, True), axis=1)
# Calculate start + end glucose for edu diaries
extod_edu_diaries[['start_glc','start_roc']] = extod_edu_diaries.apply(lambda row: preprocess_helper.calc_glc_roc(
    extod_edu_cgm.loc[extod_edu_cgm['ID']==row.ID], row.start_datetime, 30,
    False), axis=1)
extod_edu_diaries[['end_glc','end_roc']] = extod_edu_diaries.apply(lambda row: preprocess_helper.calc_glc_roc(
    extod_edu_cgm.loc[extod_edu_cgm['ID']==row.ID], row.finish_datetime,
    row.duration, False), axis=1)

# Calculate start + end glucose for 101 diaries
extod_101_diaries['start_glc'] = extod_101_diaries.apply(lambda row: preprocess_helper.calc_glc(
    extod_101_cgm.loc[extod_101_cgm['ID']==row.ID], row.start_datetime, 30,
    True), axis=1)
extod_101_diaries['end_glc'] = extod_101_diaries.apply(lambda row: preprocess_helper.calc_glc(
    extod_101_cgm.loc[extod_101_cgm['ID']==row.ID], row.finish_datetime,
    row.duration, True), axis=1)
# Calculate start + end glucose for edu diaries
extod_edu_diaries['start_glc'] = extod_edu_diaries.apply(lambda row: preprocess_helper.calc_glc(
    extod_edu_cgm.loc[extod_edu_cgm['ID']==row.ID], row.start_datetime, 30,
    False), axis=1)
extod_edu_diaries['end_glc'] = extod_edu_diaries.apply(lambda row: preprocess_helper.calc_glc(
    extod_edu_cgm.loc[extod_edu_cgm['ID']==row.ID], row.finish_datetime,
    row.duration, False), axis=1)

In [83]:
# Drop scan_glc column and null values
extod_101_cgm = extod_101_cgm.drop(columns='scan_glc').dropna()

In [84]:
extod_101_diaries.head()

Unnamed: 0,ID,start_datetime,finish_datetime,intensity,type_of_exercise,starting_glucose,finishing_glucose,month,day,day_of_week,time_of_day,duration,form_of_exercise,start_glc,start_roc,end_glc,end_roc
1,3039,2018-05-19 09:00:00,2018-05-19 09:28:00,12.0,run,12.0,7.0,5,19,5,morning,28.0,aer,14.2,3.36,10.7,7.0
2,3039,2018-05-22 18:00:00,2018-05-22 19:17:00,13.0,run,13.0,7.0,5,22,1,evening,77.0,aer,7.6,1.111111,4.2,3.214286
3,3039,2018-05-28 09:30:00,2018-05-28 10:33:00,17.0,run,11.0,13.0,5,28,0,morning,63.0,aer,,,,
4,3039,2018-05-31 17:00:00,2018-05-31 18:30:00,15.0,weights,9.0,5.0,5,31,3,afternoon,90.0,ana,,,,
5,3039,2018-06-04 17:00:00,2018-06-04 18:15:00,15.0,gym,8.2,5.6,6,4,0,afternoon,75.0,mix,17.8,4.090909,11.1,3.818182


In [85]:
def calc_rate_of_change(df, time, window, libre=False):
    '''
    Calculates the starting glucose from the time and cgm data
    '''
    time = pd.to_datetime(time)
    # treating the exercise period as truth, so only look before
    sub_df = df[(df['time'] >= (time - timedelta(minutes=window))) &
                (df['time'] < time)]
    if libre:
        sub_df.dropna(subset=['glc', 'scan_glc'], how='all', inplace=True)
        sub_df['glc'].fillna(sub_df.scan_glc, inplace=True)
    # Calculate diff
    sub_df['one_time'] = time
    sub_df['diff'] = sub_df[['time', 'one_time']].diff(axis=1)['one_time']
    sub_df['diff'] = sub_df['diff'].apply(lambda x: abs(x.total_seconds()))
    if not sub_df.empty:
        ind = sub_df['diff'].idxmin()
        glc = sub_df['glc'].loc[ind]
    else:
        glc = np.nan
    return glc

## 3.3. Divide CGM data into bouts

In [86]:
# Create unique id for each bout
extod_101_diaries = preprocess_helper.create_bout_id(extod_101_diaries)
extod_edu_diaries = preprocess_helper.create_bout_id(extod_edu_diaries)

In [87]:
extod_101_diaries.shape[0]

688

In [88]:
extod_edu_diaries.shape[0]

804

In [133]:
extod_edu_diaries.dropna(subset=['start_glc']).shape

(461, 18)

In [89]:
# Combine data from both studies
diaries = pd.concat([extod_101_diaries, extod_edu_diaries], axis=0)

In [132]:
diaries.dropna(subset=['start_glc']).shape

(998, 18)

## Plot diagram

In [90]:
example = extod_edu_diaries.iloc[430]
extod_edu_cgm[(extod_edu_cgm.ID==example.ID)&
              (extod_edu_cgm.time>example.start_datetime)&
              (extod_edu_cgm.time<example.finish_datetime)]

Unnamed: 0,time,glc,ID
230122,2018-04-09 10:41:03,8.44,1046
230123,2018-04-09 10:46:03,8.44,1046
230124,2018-04-09 10:51:03,8.44,1046
230125,2018-04-09 10:56:03,8.32,1046
230126,2018-04-09 11:01:03,8.1,1046
230127,2018-04-09 11:06:03,7.82,1046
230128,2018-04-09 11:11:03,7.27,1046
230129,2018-04-09 11:16:03,7.27,1046
230130,2018-04-09 11:21:03,7.38,1046


In [91]:
example

ID                                  1046
type_of_exercise                 Walking
starting_glucose                     8.2
finishing_glucose                    7.3
duration                            44.0
intensity                           13.0
start_datetime       2018-04-09 10:40:00
finish_datetime      2018-04-09 11:24:00
month                                  4
day                                    9
day_of_week                            0
time_of_day                      morning
form_of_exercise                     aer
start_glc                           8.38
start_roc                      -1.678497
end_glc                             7.38
end_roc                         1.882353
bout_id                         1046_430
Name: 430, dtype: object

## 3.4. Select CGM periods 

def store_glucose_data_as_series(dataframe):
    '''
    Selects only the time and glucose data from the dataframe
    '''
    glc_series = dataframe.set_index('time')['glc']
    return glc_series


def set_up_dataframes(cgm_df, exercise_df, period, interval_size):
    '''
    Selects the cgm data for each bout for the period selected and gives it a
    unique id
    '''
    missing_data = []
    # Create an empty dataframe for results
    cgm_dataframe = pd.DataFrame()
    glucose_data = pd.DataFrame()
    num_left = 0
    # Set of all IDs in CGM data and exercise data
    ids_cgm = set(cgm_df['ID'].values)
    ids_exercise = set(exercise_df['ID'].values)
    # Use intersect to only search IDs that have both diaries and CGM data
    # Maybe should return these somehow?
    ids_intersect = ids_cgm.intersection(ids_exercise)
    missing_ids = list(ids_exercise - ids_intersect)
    #print(len(missing_ids))
    #print(exercise_df[exercise_df['ID'].isin(missing_ids)].shape[0])
    # Exercise diary comes in and is divided based on the id
    for ID in ids_intersect:
        # Select exercise diary for each ID
        diary_id = exercise_df.loc[exercise_df['ID'] == ID]
        # Loop through each row in the diary
        for i, row in diary_id.iterrows():
            # Set global limit of percentage missing to be excluded to >40%
            limit = 40
            glc_series_bool = False
            if period=='during':
                # Set start to start of exercise bout
                start = row.start_datetime
                # End is end of bout
                end = row.finish_datetime
                glc_series_bool = True
                # Set percent missing limit to 30%
            elif period=='before':
                # Set start to 4hrs before start of exercise bout
                start = row.start_datetime - timedelta(hours=4)
                # Set end to start of bout
                end = row.start_datetime
                glc_series_bool = True
                limit = 100
            elif period=='1hr_after':
                # Set start to end of bout
                start = row.finish_datetime
                # And end to 1hr after end of bout
                end = row.finish_datetime + timedelta(hours=1)
            elif period=='4hrs_after':
                # Set start to 1hr after end of bout
                start = row.finish_datetime # start = row.finish_datetime + timedelta(hours=1)
                # Set end to 4hrs after end of bout
                end = row.finish_datetime + timedelta(hours=4)
                limit=50
            else:
                print('Invalid period')
            # Select the CGM data for the period selected
            cgm_id = cgm_df.loc[(cgm_df['ID'] == ID) & (cgm_df['time'] >= start)
                                & (cgm_df['time'] < end)]
            if glc_series_bool:
                glc_series = store_glucose_data_as_series(cgm_id)
                glucose_data = glucose_data.append([[row.bout_id, glc_series]])

            # Give this data the unique bout id
            cgm_id['bout_id'] = row.bout_id
            # Calculate the percentage missing for the dataset
            perc_missing = cgm.percent_missing(cgm_id, 'time', 'glc', 'ID',
                                               interval_size, start, 
                                               end).percent_missing.values
            if perc_missing[0] >= limit:
                missing_data.append([ID, start, end, perc_missing[0]])
                '''
                print(ID)
                print(start)
                print(end)
                print(cgm_id.shape[0])
                print(perc_missing)
                print('')
                '''
            # Append the cgm data to results if the % missing is below limit
            if perc_missing<100: #limit:
                #num_left+=1
                cgm_dataframe = cgm_dataframe.append(cgm_id)
                
                
    #print(num_left)
    return cgm_dataframe, glucose_data, missing_data

#### Before

In [92]:
# Set up the 'before' dataframes for each study
extod_101_before, extod_101_before_series, missing_data_101 = preprocess_helper.set_up_dataframes(extod_101_cgm, extod_101_diaries, 'before',
                                     15)
extod_101_before.reset_index(drop=True, inplace=True)
extod_101_before.drop(columns=['ID'], inplace=True)

In [93]:
extod_edu_before, extod_edu_before_series, missing_data_edu = preprocess_helper.set_up_dataframes(extod_edu_cgm, extod_edu_diaries, 'before',
                                     5)
extod_edu_before.reset_index(drop=True, inplace=True)
#extod_edu_before.drop(columns=['period', 'ID'], inplace=True)

#### During

In [94]:
# Set up during CGM dataframe for each study
extod_101_during, extod_101_during_series, _ = preprocess_helper.set_up_dataframes(extod_101_cgm, extod_101_diaries, 'during',
                                     15)
extod_101_during.reset_index(drop=True, inplace=True)
extod_edu_during, extod_edu_during_series, _ = preprocess_helper.set_up_dataframes(extod_edu_cgm, extod_edu_diaries, 'during',
                                     5)
extod_edu_during.reset_index(drop=True, inplace=True)

#### 1hr after

In [95]:
# Set up CGM datasets
extod_101_1hr_after, extod_101_1hr_after_series, _ = preprocess_helper.set_up_dataframes(extod_101_cgm, extod_101_diaries, '1hr_after', 15)
extod_101_1hr_after.reset_index(drop=True, inplace=True)
extod_edu_1hr_after, extod_edu_1hr_after_series, _ = preprocess_helper.set_up_dataframes(extod_edu_cgm, extod_edu_diaries, '1hr_after', 5)
extod_edu_1hr_after.reset_index(drop=True, inplace=True)

#### 4hrs after

In [96]:
# Set up CGM datasets
extod_101_4hr_after, extod_101_4hr_after_series, _ = preprocess_helper.set_up_dataframes(extod_101_cgm, extod_101_diaries,
                                        '4hrs_after', 15)
extod_101_4hr_after.reset_index(drop=True, inplace=True)
extod_edu_4hr_after, extod_edu_4hr_after_series, _ = preprocess_helper.set_up_dataframes(extod_edu_cgm, extod_edu_diaries,
                                        '4hrs_after', 5)
extod_edu_4hr_after.reset_index(drop=True, inplace=True)

pd.DataFrame(missing_data_101, columns=['ID', 'start', 'end', 'perc_missing']).shape[0]

pd.DataFrame(missing_data_edu, columns=['ID', 'start', 'end', 'perc_missing']).shape[0]

extod_101_before.head()

### 3.5.1. Create the dataframes for predicting during exercise

#### 3.4.1.1. Extract glycemic metrics

before_cgm = pd.concat([extod_101_before, extod_edu_before]).reset_index(drop=True)

In [97]:
# Calculate glycemic metrics for both datasets
glyc_metrics_101_before = cgm.all_metrics(extod_101_before, ID='bout_id',
                                          interval_size=15,
                                          exercise_thresholds=True)
glyc_metrics_edu_before = cgm.all_metrics(extod_edu_before, ID='bout_id',
                                          interval_size=5,
                                          exercise_thresholds=True)
# Create identification column for each study
glyc_metrics_101_before['study'] = 'extod_101'
glyc_metrics_edu_before['study'] = 'extod_edu'
# Concatenate both
X_before_glyc_metrics = pd.concat([glyc_metrics_101_before,
                                   glyc_metrics_edu_before]).reset_index(drop=True).rename(columns={'ID':'bout_id'})
# Label columns with 'before'
X_before_glyc_metrics.columns = ['before_'+i  if i not in ['study', 'bout_id'] else i for i in X_before_glyc_metrics.columns]

Index(['ID', 'TIR_lv2_hypo', 'TIR_lv1_hypo', 'TIR_hypo', 'TIR_norm',
       'TIR_hyper', 'TIR_lv1_hyper', 'TIR_lv2_hyper'],
      dtype='object')
Index(['ID', 'number_hypos', 'avg_length_of_hypo', 'total_time_in_hypos',
       'number_lv1_hypos', 'number_lv2_hypos'],
      dtype='object')
Index(['ID', 'average_glucose'], dtype='object')
Index(['ID', 'mage_mean'], dtype='object')
Index(['ID', 'ea1c'], dtype='object')
Index(['ID', 'percent_missing'], dtype='object')
Index(['ID', 'TIR_lv2_hypo', 'TIR_lv1_hypo', 'TIR_hypo', 'TIR_norm',
       'TIR_hyper', 'TIR_lv1_hyper', 'TIR_lv2_hyper'],
      dtype='object')
Index(['ID', 'number_hypos', 'avg_length_of_hypo', 'total_time_in_hypos',
       'number_lv1_hypos', 'number_lv2_hypos'],
      dtype='object')
Index(['ID', 'average_glucose'], dtype='object')
Index(['ID', 'mage_mean'], dtype='object')
Index(['ID', 'ea1c'], dtype='object')
Index(['ID', 'percent_missing'], dtype='object')


In [98]:
X_before_glyc_metrics.head()

Unnamed: 0,bout_id,before_TIR_lv2_hypo,before_TIR_lv1_hypo,before_TIR_hypo,before_TIR_norm,before_TIR_hyper,before_TIR_lv1_hyper,before_TIR_lv2_hyper,before_TIR_hypo_exercise,before_TIR_normal_exercise,before_TIR_hyper_exercise,before_number_hypos,before_avg_length_of_hypo,before_total_time_in_hypos,before_number_lv1_hypos,before_number_lv2_hypos,before_number_hypos_below_5,before_avg_length_hypo_below_5,before_total_time_in_hypos_below_5,before_sd,before_cv,before_minimum_glucose,before_maximum_glucose,before_average_glucose,before_mage_mean,before_ea1c,before_percent_missing,study
0,3046_675,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.496655,5.227952,8.9,10.0,9.5,1.1,7.603774,0.0,extod_101
1,3008_167,0.0,0.0,0.0,37.5,62.5,62.5,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,1.290556,12.986731,7.8,11.4,9.9375,3.6,7.878931,0.0,extod_101
2,3030_508,0.0,0.0,0.0,50.0,50.0,50.0,0.0,0.0,75.0,25.0,0,0.0,0.0,0,0,0,0.0,0.0,2.021551,18.892996,8.7,13.1,10.7,4.4,8.358491,0.0,extod_101
3,3028_467,0.0,0.0,0.0,50.0,50.0,50.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.767572,7.3628,9.6,11.3,10.425,1.7,8.185535,0.0,extod_101
4,3007_153,0.0,0.0,0.0,0.0,100.0,0.0,100.0,0.0,0.0,100.0,0,0.0,0.0,0,0,0,0.0,0.0,2.333095,9.095888,22.6,27.8,25.65,5.2,17.761006,0.0,extod_101


#### 3.4.2.1. Extract glycemic metrics (during)

In [99]:
# Calculate glycemic metrics for extod 101 datasets
glyc_metrics_101_during = cgm.all_metrics(extod_101_during, ID='bout_id', interval_size=15,
                  exercise_thresholds=True)
# Calculate glycemic metrics for extod education datasets
glyc_metrics_edu_during = cgm.all_metrics(extod_edu_during, ID='bout_id', interval_size=5,
                  exercise_thresholds=True)
# Concat the two
X_during_glyc_metrics = pd.concat([glyc_metrics_101_during,
                                   glyc_metrics_edu_during]).rename(columns={'ID':'bout_id'})
# Label columns with 'during'
X_during_glyc_metrics.columns = ['during_'+i  if i not in ['bout_id'] else i for i in X_during_glyc_metrics.columns]

Index(['ID', 'TIR_lv2_hypo', 'TIR_lv1_hypo', 'TIR_hypo', 'TIR_norm',
       'TIR_hyper', 'TIR_lv1_hyper', 'TIR_lv2_hyper'],
      dtype='object')
Index(['ID', 'number_hypos', 'avg_length_of_hypo', 'total_time_in_hypos',
       'number_lv1_hypos', 'number_lv2_hypos'],
      dtype='object')
Index(['ID', 'average_glucose'], dtype='object')
Index(['ID', 'mage_mean'], dtype='object')
Index(['ID', 'ea1c'], dtype='object')
Index(['ID', 'percent_missing'], dtype='object')
Index(['ID', 'TIR_lv2_hypo', 'TIR_lv1_hypo', 'TIR_hypo', 'TIR_norm',
       'TIR_hyper', 'TIR_lv1_hyper', 'TIR_lv2_hyper'],
      dtype='object')
Index(['ID', 'number_hypos', 'avg_length_of_hypo', 'total_time_in_hypos',
       'number_lv1_hypos', 'number_lv2_hypos'],
      dtype='object')
Index(['ID', 'average_glucose'], dtype='object')
Index(['ID', 'mage_mean'], dtype='object')
Index(['ID', 'ea1c'], dtype='object')
Index(['ID', 'percent_missing'], dtype='object')


In [100]:
X_during_glyc_metrics.head()

Unnamed: 0,bout_id,during_TIR_lv2_hypo,during_TIR_lv1_hypo,during_TIR_hypo,during_TIR_norm,during_TIR_hyper,during_TIR_lv1_hyper,during_TIR_lv2_hyper,during_TIR_hypo_exercise,during_TIR_normal_exercise,during_TIR_hyper_exercise,during_number_hypos,during_avg_length_of_hypo,during_total_time_in_hypos,during_number_lv1_hypos,during_number_lv2_hypos,during_number_hypos_below_5,during_avg_length_hypo_below_5,during_total_time_in_hypos_below_5,during_sd,during_cv,during_minimum_glucose,during_maximum_glucose,during_average_glucose,during_mage_mean,during_ea1c,during_percent_missing
0,3046_675,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.386868,4.243522,8.8,9.8,9.116667,0.9,7.362683,0.0
1,3008_167,0.0,0.0,0.0,54.545455,45.454545,45.454545,0.0,18.181818,81.818182,0.0,0,0.0,0.0,0,0,0,0.0,0.0,2.913916,33.84697,3.9,11.4,8.609091,7.5,7.043453,0.0
2,3030_508,0.0,0.0,0.0,25.0,75.0,75.0,0.0,0.0,75.0,25.0,0,0.0,0.0,0,0,0,0.0,0.0,1.639868,14.416421,9.5,13.3,11.375,3.8,8.783019,0.0
3,3028_467,25.0,25.0,50.0,50.0,0.0,0.0,0.0,50.0,50.0,0.0,1,16.0,16.0,1,0,1,16.0,16.0,2.447448,46.61805,2.9,8.4,5.25,5.5,4.930818,0.0
4,3007_153,0.0,0.0,0.0,0.0,100.0,0.0,100.0,0.0,0.0,100.0,0,0.0,0.0,0,0,0,0.0,0.0,0.697854,3.374535,19.8,21.4,20.68,1.6,14.63522,0.0


#### 3.4.2.2. Extract target features (y) for during exercise

In [101]:
# Select the target features (y)
y_101_during = glyc_metrics_101_during.loc[:,['ID', 'TIR_hypo_exercise',
                                              'TIR_hypo', 'TIR_hyper_exercise'
                                              ]]
y_101_during.columns=['bout_id', 'y_hypo', 'y_hypo_3', 'y_hyper']
y_101_during['y_hypo'] = y_101_during['y_hypo']>0
y_101_during['y_hypo_3'] = y_101_during['y_hypo_3']>0
y_101_during['y_hyper'] = y_101_during['y_hyper']>0

y_edu_during = glyc_metrics_edu_during.loc[:,['ID', 'TIR_hypo_exercise',
                                              'TIR_hypo',  'TIR_hyper_exercise'
                                              ]]
y_edu_during.columns=['bout_id', 'y_hypo', 'y_hypo_3', 'y_hyper']
y_edu_during['y_hypo'] = y_edu_during['y_hypo']>0
y_edu_during['y_hypo_3'] = y_edu_during['y_hypo_3']>0
y_edu_during['y_hyper'] = y_edu_during['y_hyper']>0


# Concat the two
y_during = pd.concat([y_101_during, y_edu_during])
# Rename y columns
#y_during.columns = ['bout_id', 'y_hypo', 'hyper']

In [102]:
during_merged = X_during_glyc_metrics.merge(y_during, on='bout_id')

#### 3.4.3. Calculate y values for 1hr after
Only need to calculate y values for 1hr after

In [103]:
# Calculate time in range
y_101_1hr_after = cgm.time_in_range(extod_101_1hr_after, ID='bout_id', exercise_thresholds=False)[['ID','TIR_hypo' , 'TIR_hyper']]
y_101_1hr_after.columns = ['bout_id', 'y_hypo', 'y_hyper']
y_101_1hr_after['y_hypo'] = y_101_1hr_after['y_hypo']>0
y_101_1hr_after['y_hyper'] = y_101_1hr_after['y_hyper']>0

y_edu_1hr_after = cgm.time_in_range(extod_edu_1hr_after, ID='bout_id', exercise_thresholds=False)[['ID','TIR_hypo', 'TIR_hyper']]
y_edu_1hr_after.columns = ['bout_id', 'y_hypo', 'y_hyper']
y_edu_1hr_after['y_hypo'] = y_edu_1hr_after['y_hypo']>0
y_edu_1hr_after['y_hyper'] = y_edu_1hr_after['y_hyper']>0

# Concatenate both studies
y_1hr_after = pd.concat([y_101_1hr_after, y_edu_1hr_after])
y_1hr_after.columns = ['bout_id', 'y_hypo', 'y_hyper']

#### 3.4.4. Calculate y values for 4hr after
Only need to calculate the y values for 4hrs after

In [104]:
# Calculate time in range
y_101_4hr_after = cgm.time_in_range(extod_101_4hr_after, ID='bout_id',
                                    exercise_thresholds=False)[['ID','TIR_hypo', 'TIR_hyper'
                                                                ]]
y_101_4hr_after.columns = ['bout_id', 'y_hypo', 'y_hyper']
y_101_4hr_after['y_hypo'] = y_101_4hr_after['y_hypo']>0
y_101_4hr_after['y_hyper'] = y_101_4hr_after['y_hyper']>0

y_edu_4hr_after = cgm.time_in_range(extod_edu_4hr_after, ID='bout_id',
                                    exercise_thresholds=False)[['ID','TIR_hypo', 'TIR_hyper'
                                                                ]]
y_edu_4hr_after.columns = ['bout_id', 'y_hypo', 'y_hyper']
y_edu_4hr_after['y_hypo'] = y_edu_4hr_after['y_hypo']>0
y_edu_4hr_after['y_hyper'] = y_edu_4hr_after['y_hyper']>0

# Concatenate both studies
y_4hr_after = pd.concat([y_101_4hr_after, y_edu_4hr_after])
# Rename as y values
y_4hr_after.columns = ['bout_id', 'y_hypo', 'y_hyper']

In [105]:
y_4hr_after

Unnamed: 0,bout_id,y_hypo,y_hyper
0,3046_675,False,True
1,3008_167,False,True
2,3007_153,False,True
3,3009_207,True,False
4,3028_467,True,False
5,3003_67,False,True
6,3030_508,False,False
7,3002_42,True,False
8,3008_194,False,False
9,3036_617,True,False


## tsfresh metrics

In [106]:
extraction_settings = ComprehensiveFCParameters()
def extract_tsfresh(timeseries, y):
    intersecting_ids = set(timeseries.bout_id).intersection(set(y.index))
    timeseries = timeseries.loc[timeseries.bout_id.isin(intersecting_ids)]
    y = y.loc[y.index.isin(intersecting_ids)]
    # Extract all features for both studies using tsfresh
    X = extract_relevant_features(timeseries, y, #.set_index('bout_id').y_hypo,
                                    column_id="bout_id", column_sort="time",
                                    column_value="glc", #impute_function=impute,
                                    show_warnings=False,
                                    default_fc_parameters=extraction_settings)
    return X

### 1. During

In [107]:
y_edu_during.set_index('bout_id', inplace=True)
y_101_during.set_index('bout_id', inplace=True)

In [108]:
y_edu_during

Unnamed: 0_level_0,y_hypo,y_hypo_3,y_hyper
bout_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1008_79,True,True,False
1010_123,True,True,False
1015_177,False,False,True
1045_412,False,False,True
1014_164,False,False,True
2045_763,False,False,True
1013_147,False,False,True
2017_581,True,False,False
1031_288,False,False,True
2003_517,True,True,False


#### 5mmol/L

In [109]:
# Extract features
ts_fresh_edu_hypo_5  = extract_tsfresh(extod_edu_before, y_edu_during.y_hypo)
ts_fresh_101_hypo_5 = extract_tsfresh(extod_101_before, y_101_during.y_hypo)
# Concatenate both studies
before_tsfresh_during_hypo_5 = pd.concat([ts_fresh_edu_hypo_5, ts_fresh_101_hypo_5]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
before_tsfresh_during_hypo_5.columns = ['before_' + i if i not in ['bout_id'] else i for i in before_tsfresh_during_hypo_5.columns]

Feature Extraction: 100%|██████████| 30/30 [00:02<00:00, 10.02it/s]
Feature Extraction: 100%|██████████| 30/30 [00:03<00:00,  9.16it/s]


#### 3.9mmol/L

In [110]:
# Extract features
ts_fresh_edu_hypo_3  = extract_tsfresh(extod_edu_before, y_edu_during.y_hypo_3)
ts_fresh_101_hypo_3 = extract_tsfresh(extod_101_before, y_101_during.y_hypo_3)
# Concatenate both studies
before_tsfresh_during_hypo_3 = pd.concat([ts_fresh_edu_hypo_3, ts_fresh_101_hypo_3]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
before_tsfresh_during_hypo_3.columns = ['before_' + i if i not in ['bout_id'] else i for i in before_tsfresh_during_hypo_3.columns]

Feature Extraction: 100%|██████████| 30/30 [00:02<00:00, 10.28it/s]
Feature Extraction: 100%|██████████| 30/30 [00:03<00:00,  9.94it/s]


#### Hyperglycaemia

In [111]:
# Extract features
ts_fresh_edu_hyper  = extract_tsfresh(extod_edu_before, y_edu_during.y_hyper)
ts_fresh_101_hyper = extract_tsfresh(extod_101_before, y_101_during.y_hyper)
# Concatenate both studies
before_tsfresh_during_hyper = pd.concat([ts_fresh_edu_hyper, ts_fresh_101_hyper]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
before_tsfresh_during_hyper.columns = ['before_' + i if i not in ['bout_id'] else i for i in before_tsfresh_during_hyper.columns]

Feature Extraction: 100%|██████████| 30/30 [00:02<00:00, 10.71it/s]
Feature Extraction: 100%|██████████| 30/30 [00:03<00:00,  9.16it/s]


### 1hr after

##### 4hrs before

In [112]:
y_edu_1hr_after.set_index('bout_id', inplace=True)
y_101_1hr_after.set_index('bout_id', inplace=True)
y_edu_4hr_after.set_index('bout_id', inplace=True)
y_101_4hr_after.set_index('bout_id', inplace=True)

In [113]:
# Extract features
ts_fresh_edu_1hr  = extract_tsfresh(extod_edu_before, y_edu_1hr_after.y_hypo)
ts_fresh_101_1hr = extract_tsfresh(extod_101_before, y_101_1hr_after.y_hypo)
# Concatenate both studies
before_tsfresh_1hr = pd.concat([ts_fresh_edu_1hr, ts_fresh_101_1hr]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
before_tsfresh_1hr.columns = ['before_' + i if i not in ['bout_id'] else i for i in before_tsfresh_1hr.columns]

Feature Extraction: 100%|██████████| 29/29 [00:02<00:00,  9.72it/s]
Feature Extraction: 100%|██████████| 30/30 [00:03<00:00,  9.75it/s]


##### During

In [114]:
# Extract features
ts_fresh_edu  = extract_tsfresh(extod_edu_during, y_edu_1hr_after.y_hypo)
ts_fresh_101 = extract_tsfresh(extod_101_during, y_101_1hr_after.y_hypo)
# Concatenate both studies
during_tsfresh_1hr = pd.concat([ts_fresh_edu, ts_fresh_101]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
during_tsfresh_1hr.columns = ['during_' + i if i not in ['bout_id'] else i for i in during_tsfresh_1hr.columns]

Feature Extraction: 100%|██████████| 30/30 [00:02<00:00, 10.01it/s]
Feature Extraction: 100%|██████████| 30/30 [00:03<00:00,  9.73it/s]


### 4hrs after

##### 4hrs before

In [115]:
# Extract features
ts_fresh_edu  = extract_tsfresh(extod_edu_before, y_edu_4hr_after.y_hypo)
ts_fresh_101 = extract_tsfresh(extod_101_before, y_101_4hr_after.y_hypo)
# Concatenate both studies
before_tsfresh_4hr = pd.concat([ts_fresh_edu, ts_fresh_101]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
before_tsfresh_4hr.columns = ['before_' + i if i not in ['bout_id'] else i for i in before_tsfresh_4hr.columns]

Feature Extraction: 100%|██████████| 29/29 [00:03<00:00,  9.29it/s]
Feature Extraction: 100%|██████████| 30/30 [00:03<00:00,  9.50it/s]


##### During

In [116]:
# Extract features
ts_fresh_edu  = extract_tsfresh(extod_edu_during, y_edu_4hr_after.y_hypo)
ts_fresh_101 = extract_tsfresh(extod_101_during, y_101_4hr_after.y_hypo)
# Concatenate both studies
during_tsfresh_4hr = pd.concat([ts_fresh_edu, ts_fresh_101]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
during_tsfresh_4hr.columns = ['before_' + i if i not in ['bout_id'] else i for i in during_tsfresh_4hr.columns]

Feature Extraction: 100%|██████████| 30/30 [00:02<00:00, 10.21it/s]
Feature Extraction: 100%|██████████| 30/30 [00:02<00:00, 10.02it/s]


# Extract all features for both studies using tsfresh
X_101_during_fresh = extract_relevant_features(extod_101_during,
                                column_id="bout_id", column_sort="time",
                                column_value="glc", impute_function=impute,
                                show_warnings=False,
                                default_fc_parameters=extraction_settings
                               )
X_edu_during_fresh = extract_relevant_features(extod_edu_during,
                                column_id="bout_id", column_sort="time",
                                column_value="glc", impute_function=impute,
                                show_warnings=False,
                                default_fc_parameters=extraction_settings
                               )
# Concatenate both studies
X_during_tsfresh = pd.concat([X_101_during_fresh, X_edu_during_fresh]).reset_index().rename(columns={'index': 'bout_id'})
# Label columns with 'during'
X_during_tsfresh.columns = ['during_' + i if i not in ['bout_id'] else i for i in X_during_tsfresh.columns]

## 3.5. Combine dataframes for the machine learning tasks

In [117]:
# Set directory for saving files
df_directory = '../../Data/tidy_data/'

### During

In [118]:
# Frames to be merged, glyc metrics for 4hrs before, exercise diaries, TIR during
frames = [X_before_glyc_metrics, diaries]#, y_during]
# Merge 'em on bout_id
df_glyc_during = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), frames)
# Recreate ID from bout_id
df_glyc_during['ID'] = df_glyc_during['bout_id'].apply(lambda x: int(x[:4]))
# Merge with demographics and lab data on ID
df_glyc_during = df_glyc_during.merge(demo_lab, on='ID')

In [119]:
# Glycemic metrics only
df_glyc_during.head()

Unnamed: 0,bout_id,before_TIR_lv2_hypo,before_TIR_lv1_hypo,before_TIR_hypo,before_TIR_norm,before_TIR_hyper,before_TIR_lv1_hyper,before_TIR_lv2_hyper,before_TIR_hypo_exercise,before_TIR_normal_exercise,before_TIR_hyper_exercise,before_number_hypos,before_avg_length_of_hypo,before_total_time_in_hypos,before_number_lv1_hypos,before_number_lv2_hypos,before_number_hypos_below_5,before_avg_length_hypo_below_5,before_total_time_in_hypos_below_5,before_sd,before_cv,before_minimum_glucose,before_maximum_glucose,before_average_glucose,before_mage_mean,before_ea1c,before_percent_missing,study,ID,start_datetime,finish_datetime,intensity,type_of_exercise,starting_glucose,finishing_glucose,month,day,day_of_week,time_of_day,duration,form_of_exercise,start_glc,start_roc,end_glc,end_roc,age,sex,years_since_diagnosis,insulin_administration,bmi,urine_cpep,urine_creat,hba1c,chol,cpep,h_index,hdl,ldl,nhdl,trig
0,3046_675,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.496655,5.227952,8.9,10.0,9.5,1.1,7.603774,0.0,extod_101,3046,2018-06-15 16:57:00,2018-06-15 18:28:00,14.0,run,13.8,4.6,6,15,4,afternoon,91.0,aer,10.0,-1.354839,8.9,-0.1875,63.635866,male,61.560575,pump,25.086505,0.03,3.1,52.0,4.7,3.0,72.0,2.22,2.22,2.48,0.57
1,3046_678,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.543906,7.04085,7.3,8.5,7.725,0.8,6.487421,0.0,extod_101,3046,2018-06-21 15:14:00,2018-06-21 16:14:00,10.0,run,7.8,12.2,6,21,3,afternoon,60.0,aer,8.5,-2.538462,15.3,-5.04,63.635866,male,61.560575,pump,25.086505,0.03,3.1,52.0,4.7,3.0,72.0,2.22,2.22,2.48,0.57
2,3046_674,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.613052,7.735678,7.3,8.7,7.925,1.4,6.613208,0.0,extod_101,3046,2018-06-12 17:29:00,2018-06-12 18:25:00,14.0,run,78.0,4.1,6,12,1,evening,56.0,aer,7.3,0.857143,7.2,-2.181818,63.635866,male,61.560575,pump,25.086505,0.03,3.1,52.0,4.7,3.0,72.0,2.22,2.22,2.48,0.57
3,3046_676,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.216025,2.805515,7.4,7.9,7.7,0.4,6.471698,0.0,extod_101,3046,2018-06-18 15:10:00,2018-06-18 16:10:00,12.0,run,8.1,9.3,6,18,0,afternoon,60.0,aer,7.8,0.2,9.3,-1.0,63.635866,male,61.560575,pump,25.086505,0.03,3.1,52.0,4.7,3.0,72.0,2.22,2.22,2.48,0.57
4,3046_680,0.0,50.0,50.0,50.0,0.0,0.0,0.0,75.0,25.0,0.0,1,15.0,15.0,1,0,1,30.0,30.0,0.788987,19.12695,3.4,5.2,4.125,1.8,4.22327,0.0,extod_101,3046,2018-07-05 09:40:00,2018-07-05 10:19:00,14.0,walk,7.3,4.8,7,5,3,morning,39.0,aer,3.4,0.72,3.0,0.315789,63.635866,male,61.560575,pump,25.086505,0.03,3.1,52.0,4.7,3.0,72.0,2.22,2.22,2.48,0.57


#### Hypoglycaemia <5mmol/L

In [120]:
# Merge glycemic metrics and tsfresh metrics
hypo_5_frames = [df_glyc_during, before_tsfresh_during_hypo_5, y_during[['bout_id','y_hypo']]]
df_during_hypo_5 = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), hypo_5_frames)
#df_during_combined = df_glyc_during.merge(before_tsfresh_during, on='bout_id')

#### Hypoglycaemia <3.9mmol/L

In [121]:
# Merge glycemic metrics and tsfresh metrics
hypo_3_frames = [df_glyc_during, before_tsfresh_during_hypo_3, y_during[['bout_id','y_hypo_3']]]
df_during_hypo_3 = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), hypo_3_frames)
#df_during_combined = df_glyc_during.merge(before_tsfresh_during, on='bout_id')

#### Hyperglycaemia

In [122]:
# Merge glycemic metrics and tsfresh metrics
hyper_frames = [df_glyc_during, before_tsfresh_during_hyper, y_during[['bout_id','y_hyper']]]
df_during_hyper = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), hyper_frames)
#df_during_combined = df_glyc_during.merge(before_tsfresh_during, on='bout_id')

### 3.5.2. Create the dataframe for predicting 1hr after exercise

In [123]:
# Frames to be merged, glyc metrics for 4hrs before, glyc metrics during,
# exercise diaries
frames = [X_before_glyc_metrics, X_during_glyc_metrics, diaries]
# Merge 'em on bout_id
df_glyc_after = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), frames)
# Recreate ID from bout_id
df_glyc_after['ID'] = df_glyc_after['bout_id'].apply(lambda x: int(x[:4]))
# Merge demographic and lab data on ID
df_glyc_after = df_glyc_after.merge(demo_lab, on='ID')

In [124]:
# Merge glyc only with ts_fresh dataframes for before and during
frames = [df_glyc_after, before_tsfresh_1hr, during_tsfresh_1hr]
# Merge on bout_id
df_combined_after = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), frames)

In [125]:
# Glycemic metrics merge with target variable (y)
df_glyc_one_hr = df_glyc_after.merge(y_1hr_after, on='bout_id')
# Glycemic metrics and tsfresh merge with target variable (y)
df_combined_one_hr = df_combined_after.merge(y_1hr_after, on='bout_id')

### 3.5.2. Create the dataframe for predicting 4hrs after exercise

In [126]:
# Merge glyc only with ts_fresh dataframes for before and during
frames = [df_glyc_after, before_tsfresh_4hr, during_tsfresh_4hr]
# Merge on bout_id
df_combined_after = reduce(lambda left, right: pd.merge(left, right,
                                                      on=['bout_id']), frames)

In [127]:
# Glycemic metrics merge with target variable (y)
df_glyc_four_hr = df_glyc_after.merge(y_4hr_after, on='bout_id')
# Glycemic metrics and tsfresh merge with target variable (y)
df_combined_four_hr = df_combined_after.merge(y_4hr_after, on='bout_id')

## 3.6. Save to csv

In [128]:
df_glyc_during.tail()

Unnamed: 0,bout_id,before_TIR_lv2_hypo,before_TIR_lv1_hypo,before_TIR_hypo,before_TIR_norm,before_TIR_hyper,before_TIR_lv1_hyper,before_TIR_lv2_hyper,before_TIR_hypo_exercise,before_TIR_normal_exercise,before_TIR_hyper_exercise,before_number_hypos,before_avg_length_of_hypo,before_total_time_in_hypos,before_number_lv1_hypos,before_number_lv2_hypos,before_number_hypos_below_5,before_avg_length_hypo_below_5,before_total_time_in_hypos_below_5,before_sd,before_cv,before_minimum_glucose,before_maximum_glucose,before_average_glucose,before_mage_mean,before_ea1c,before_percent_missing,study,ID,start_datetime,finish_datetime,intensity,type_of_exercise,starting_glucose,finishing_glucose,month,day,day_of_week,time_of_day,duration,form_of_exercise,start_glc,start_roc,end_glc,end_roc,age,sex,years_since_diagnosis,insulin_administration,bmi,urine_cpep,urine_creat,hba1c,chol,cpep,h_index,hdl,ldl,nhdl,trig
995,2037_714,0.0,0.0,0.0,41.666667,58.333333,58.333333,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.267474,2.675187,9.27,10.27,9.998333,0.695,7.917191,23.636364,extod_edu,2037,2018-04-18 17:45:00,2018-04-18 19:00:00,13.0,"Legs,Bums, Tums class",9.8,6.7,4,18,2,evening,75.0,ana,9.27,2.599078,7.16,0.773599,34.0,female,17.29863,,25.292634,,,63.0,,5.0,,,,,
996,2032_691,0.0,0.0,0.0,0.0,100.0,0.0,100.0,0.0,0.0,100.0,0,0.0,0.0,0,0,0,0.0,0.0,0.275758,1.724924,15.48,16.26,15.986667,0.53,11.683438,0.0,extod_edu,2032,2018-03-23 07:00:00,2018-03-23 08:00:00,12.0,Run,15.3,10.1,3,23,4,morning,60.0,aer,15.76,0.78935,10.65,2.678152,37.0,male,7.221918,,20.216049,,,57.0,,91.0,,,,,
997,1038_356,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.687123,7.910693,7.27,9.6,8.686,2.11,7.091824,0.0,extod_edu,1038,2018-03-23 17:00:00,2018-03-23 18:00:00,14.0,Swim,4.7,7.8,3,23,4,afternoon,60.0,aer,7.27,3.352672,,,54.0,male,13.64931,,32.199949,,,63.0,,42.0,,,,,
998,2043_751,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.540283,7.423174,6.44,8.1,7.278333,1.61,6.206499,0.0,extod_edu,2043,2018-05-04 16:30:00,2018-05-04 17:00:00,10.5,Walking,5.7,5.6,5,4,4,afternoon,30.0,aer,6.49,1.804437,5.55,2.129827,53.0,female,23.93151,,25.691553,,,62.0,,4.0,,,,,
999,2005_532,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,100.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0.993442,11.869082,6.77,9.6,8.37,2.72,6.893082,0.0,extod_edu,2005,2018-09-13 09:16:00,2018-09-13 09:30:00,11.0,Walking,,8.4,9,13,3,morning,14.0,aer,6.77,3.490909,5.94,3.314286,47.0,male,2.175343,,27.069161,,,62.0,,448.0,,,,,


In [129]:
# During
#df_glyc_during.drop(columns=['bout_id']).to_csv(df_directory+'glyc_during.csv', index=False)
df_during_hypo_5.to_csv(df_directory+'prediction_df_during_hypo_5.csv', index=False)
df_during_hypo_3.to_csv(df_directory+'prediction_df_during_hypo_3.csv', index=False)
df_during_hyper.to_csv(df_directory+'prediction_df_during_hyper.csv', index=False)
df_combined_one_hr.to_csv(df_directory+'prediction_df_1hr_after.csv', index=False)
df_combined_four_hr.to_csv(df_directory+'prediction_df_4hr_after.csv', index=False)