# 2. Preprocessing EXTOD education dataset

The goal of this notebook is to prepare the EXTOD education data for machine learning - predicting euglycemia during and around exercise.

EXTOD education was a pilot study in which 106 participants were randomly allocated to either standard care or an education programme teaching them how better to manage blood glucose around exercise.

The data used will be demographic, lab, clinical and physiological if available.

### Objectives:
1. Clean and combine the exercise diaries
2. Preprocess demographic data
3. Preprcoess lab data
4. Prepare FGM data

## 2.0. Import packages

In [2]:
# Import packages and upload dataset
import pandas as pd
import numpy as np
import datetime
from datetime import datetime as dt
import os
from datetime import timedelta as time
import warnings
import preprocess_helper
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

## 2.1. Clean and combine exercise diaries

### 2.1.1. Load files and rename columns

In [3]:
ex_diary_tn_bsl = pd.read_excel('../../Data/raw_data/Exericse diaries entered into Excel/EXTOD_education_exercise diary Taunton.xlsx', sheet_name='Taunton_baseline')
ex_diary_tn_6m = pd.read_excel('../../Data/raw_data/Exericse diaries entered into Excel/EXTOD_education_exercise diary Taunton.xlsx', sheet_name='Taunton_6months')
ex_diary_bm_bsl = pd.read_excel('../../Data/raw_data/Exericse diaries entered into Excel/EXTOD_education_exercise diaries Birmingham.xlsx', sheet_name='Birmingham_baseline')
ex_diary_bm_6m = pd.read_excel('../../Data/raw_data/Exericse diaries entered into Excel/EXTOD_education_exercise diaries Birmingham.xlsx', sheet_name='Birmingham_6months')

In [4]:
# Create list with all diaries in
diaries = [ex_diary_tn_bsl, ex_diary_tn_6m, ex_diary_bm_bsl, ex_diary_bm_6m]

In [5]:
# This diary has an extra column 'day'
ex_diary_bm_6m.drop(columns='Day', inplace=True)

In [6]:
def clean(i):
    '''
    Renames all the diaries
    '''
    i = i.iloc[:, :14]
    i.columns = ['ID', 'date', 'exercise_on_day', 'type_of_exercise',
                 'start_time', 'starting_glucose', 'finish_time',
                 'finishing_glucose', 'duration', 'borg', 'comment',
                 'hours_in_mins', 'mins', 'duration_mins']
    return i

In [7]:
# Clean diaries
diaries = [clean(i) for i in diaries]
# Concatenate them into one df
exercise_diaries = pd.concat(diaries, axis=0)
# Only select days where exercise has occurred
exercise_diaries = exercise_diaries.loc[(exercise_diaries.exercise_on_day=='Yes') | (exercise_diaries.exercise_on_day=='yes')]

In [8]:
exercise_diaries.shape[0]

1017

In [9]:
exercise_diaries.iloc[357:400]

Unnamed: 0,ID,date,exercise_on_day,type_of_exercise,start_time,starting_glucose,finish_time,finishing_glucose,duration,borg,comment,hours_in_mins,mins,duration_mins
14,1003,2018-09-21 00:00:00,Yes,Gardening,15:00:00,10.3,No finish time,,suggest 10,14.0,,,,
15,1003,2018-09-21 00:00:00,Yes,Gardening,15:55:00,3.5,17:55:00,3.2,2,14.0,,120.0,0.0,120.0
17,1003,2018-09-23 00:00:00,Yes,Walking,10:55:00,,11:03:00,7.8,0.08,,,0.0,8.0,8.0
18,1003,2018-09-23 00:00:00,Yes,Walking,14:55:00,,15:10:00,7.6,0.15,,,0.0,15.0,15.0
19,1003,2018-09-23 00:00:00,Yes,Gardening,16:15:00,13.2,17:15:00,11.9,1,12.0,,60.0,0.0,60.0
20,1003,2018-09-24 00:00:00,Yes,Walking,09:20:00,10.1,10:50:00,7.3,0.3,9.0,,0.0,30.0,30.0
21,1003,2018-09-24 00:00:00,Yes,Gardening,11:15:00,7.3,12:40:00,4.0,1.25,10.0,,60.0,25.0,85.0
22,1003,2018-09-24 00:00:00,Yes,Intermitent Walking,14:00:00,7.4,17:20:00,10.2,3.2,8.0,,180.0,20.0,200.0
23,1003,2018-09-25 00:00:00,Yes,Walking,19:05:00,10.2,19:30:00,10.0,0.25,11.0,,0.0,25.0,25.0
24,1003,2018-09-26 00:00:00,Yes,Gardening,12:00:00,8.8,13:00:00,4.2,1,11.0,,60.0,0.0,60.0


### 2.1.2. Clean datetime column

In [10]:
# Fix the finish time
exercise_diaries.finish_time = exercise_diaries.finish_time.apply(lambda x: preprocess_helper.check_time(x))

In [11]:
# Fix the start time
exercise_diaries.start_time = exercise_diaries.start_time.apply(lambda x: preprocess_helper.check_time(x))

In [12]:
# Combine date and time to make datetime for start and finish
exercise_diaries['start_datetime'] = exercise_diaries.apply(lambda row:
                                        datetime.datetime.combine(row.date,
                                       row.start_time) if pd.notnull(row.date)
                                        and pd.notnull(row.start_time) else 
                                       np.nan, axis=1)
exercise_diaries['finish_datetime'] = exercise_diaries.apply(lambda row: 
                                      datetime.datetime.combine(row.date,
                                      row.finish_time) if pd.notnull(row.date) 
                                      and pd.notnull(row.finish_time) else
                                                             np.nan, axis=1)

In [13]:
# 41 dropped with no datetime data
exercise_diaries_complete = exercise_diaries[(pd.notnull(exercise_diaries.duration_mins) &
                               (pd.notnull(exercise_diaries.start_datetime) |
                 pd.notnull(exercise_diaries.finish_datetime))) | (pd.notnull(exercise_diaries.start_datetime) &
                 pd.notnull(exercise_diaries.finish_datetime))]

In [14]:
# Fill gaps in datetime using other 2 measurements
exercise_diaries_complete = exercise_diaries_complete.apply(lambda row: preprocess_helper.fill_missing(row),
                                                            axis=1)

In [15]:
# Calculate duration from start and finish datetimes
#exercise_diaries_complete['duration_calc'] = exercise_diaries_complete.finish_datetime - exercise_diaries_complete.start_datetime
# Convert to minutes
exercise_diaries_complete['duration'] = (exercise_diaries_complete.finish_datetime - exercise_diaries_complete.start_datetime).apply(lambda x: datetime.timedelta.total_seconds(x)/60)

In [16]:
# Reset index
exercise_diaries_complete.reset_index(drop=True, inplace=True)

In [17]:
# Manually correct some of the incorrect datetimes 
exercise_diaries_complete.at[433, 'finish_datetime'] = pd.to_datetime('2018-09-12 17:30:00')
exercise_diaries_complete.at[457, 'finish_datetime'] = pd.to_datetime('2019-01-25 21:20:00')
exercise_diaries_complete.at[759, 'finish_datetime'] = pd.to_datetime('2018-04-16 19:30:00')
exercise_diaries_complete.at[589, 'finish_datetime'] = pd.to_datetime('2019-01-12 22:45:00')

In [18]:
# Manually switch start and finish time for those who put them in the wrong way
start_times = exercise_diaries_complete.finish_datetime.iloc[[760, 730, 642, 478]]
end_times = exercise_diaries_complete.start_datetime.iloc[[760, 730, 642, 478]] 
exercise_diaries_complete.start_datetime.iloc[[760, 730, 642, 478]] = start_times
exercise_diaries_complete.finish_datetime.iloc[[760, 730, 642, 478]] = end_times

In [19]:
# Drop and instances where duration is negative
exercise_diaries_complete.drop(exercise_diaries_complete.loc[exercise_diaries_complete['duration'] < 0].index, 
                               inplace=True)

In [20]:
# Drop unneeded columns 
exdi = exercise_diaries_complete.drop(columns=['date', 'exercise_on_day', 'start_time',
                          'finish_time', 'hours_in_mins', 'mins', #'duration_calc',
                          'duration_mins', 'comment'])

### 2.1.3. Sort out dates

In [21]:
# Collec date information from start_datetime
exdi = preprocess_helper.date_preprocessing(exdi, 'start_datetime', False, True, True)

In [22]:
exdi.shape[0]

966

### 2.1.4. Borg/intensity

In [23]:
# Reset index
exdi.reset_index(drop=True, inplace=True)

In [24]:
# Manually correct some of the very strangely entered borg scores
swap_dict = {'11\\12+ 14': 12.5, '16 for 30mins,12 after ': 14, '5, 11-13': 12}
exdi.borg = exdi.borg.replace(swap_dict)
exdi.borg.value_counts()

12.0              167
13.0              148
11.0              145
14.0              101
15.0               74
10.0               61
9.0                60
16.0               34
17.0               19
8.0                13
7.0                11
Not Known           9
11\12               7
5                   6
13.5                6
12.5                6
Not known           5
18.0                5
Fairly Light        5
14.5                3
13\17               3
not stated          3
11.5                3
10\11               2
11Fairly Light      2
Hard                2
Somewhat hard       2
4.0                 2
13\14               2
8\17                1
Light uphill        1
15\16               1
12\13               1
15-17               1
14\16               1
somewhat hard       1
13/17               1
17\18               1
Moderate            1
Medium              1
Mild/Medium         1
Light               1
6.0                 1
10.5                1
15.5                1
2\3       

In [25]:
# Correct borg score
exdi.borg = exdi.borg.apply(lambda x: preprocess_helper.correct_borg(x))

Commencing phase 2


In [26]:
# Rename to intensity
exdi.rename(columns={'borg':'intensity'}, inplace=True)

### 2.1.5. Type of exercise

In [27]:
exdi.type_of_exercise.value_counts()

Walking                                                                      201
Cycling                                                                       72
Walk                                                                          58
Gardening                                                                     37
Run                                                                           30
Yoga                                                                          29
Running                                                                       28
Cycling                                                                       26
Walking                                                                       25
Weights                                                                       20
Gym                                                                           18
Dog Walking                                                                   15
Cycle                       

In [28]:
# Number bouts before
exdi.shape[0]

966

In [29]:
# Convert form_of_exercise
exdi['form_of_exercise'] = exdi.type_of_exercise.apply(lambda x: 
                                                       preprocess_helper.divide_exercise_into_type(x))
# Drop any nulls
exdi.dropna(subset=['form_of_exercise'], inplace=True)


cycling
aer

cycling
aer

running
aer

cycling
aer

cycling
aer

football
aer

cycling
aer

cycling
aer

cycling
aer

running
aer

cycling
aer

cycling
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking/ sea swim
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

cycling
aer

walking
aer

walking
aer

walking
aer

walking
aer

davina workout
mix

wii zumba
aer

power walking
aer

power walking
aer

power walking
aer

power walking
aer

power walking
aer

dog walk 
aer

dog walk 
aer

dog walk 
aer

golf
aer

dog walk
aer

dog walk
aer

golf
aer

dog walk 
aer

dog walk 
aer

dog walk 
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walking
aer

walki

In [30]:
# Number bouts after
exdi.shape[0]

875

In [31]:
exdi.tail()

Unnamed: 0,ID,type_of_exercise,starting_glucose,finishing_glucose,duration,intensity,start_datetime,finish_datetime,month,day,day_of_week,time_of_day,form_of_exercise
959,2049,Walk,8.2,6.0,90.0,9.0,2018-11-27 10:00:00,2018-11-27 11:30:00,11,27,1,morning,aer
960,2049,Walk,5.9,7.0,60.0,9.0,2018-11-27 12:00:00,2018-11-27 13:00:00,11,27,1,afternoon,aer
961,2050,Walking,8.2,6.2,40.0,14.0,2019-01-09 18:55:00,2019-01-09 19:35:00,1,9,2,evening,aer
964,2050,Walk,7.4,,30.0,15.0,2019-01-13 18:30:00,2019-01-13 19:00:00,1,13,6,evening,aer
965,2050,Walk,8.2,,30.0,14.0,2019-01-14 19:05:00,2019-01-14 19:35:00,1,14,0,evening,aer


## 2.2. Clean and combine CGM files

In [32]:
# Upload directory for individual data
directory = '../../Data/raw_data/extod_edu_cgm/'

In [33]:
def try_parsing_date(text):
    text = str(text)
    formats = ("%d-%m-%Y %H:%M:%S", "%d-%m-%Y %H:%M:%S", "%d/%m/%Y %H:%M",
               "%d-%m-%Y %H:%M", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S",
               "%Y-%m-%d %H:%M", "%Y/%m/%d %H:%M", "%d/%m/%Y  %H:%M:%S")  # add dot format
    for fmt in formats:
        try:
            dt.strptime(text, fmt)
            return True
        except ValueError:
            pass
    return False


def test_col(col):
    col = col.dropna()
    datetime_bool = col.apply(lambda x: try_parsing_date(x))
    if datetime_bool.all():
        return 'dt'
    try:
        # print(col.tail())
        col_num = pd.to_numeric(col).dropna()
        # print('NUM')
        if ((col_num < 28) & (col_num > 2)).all():
            #print('glc_uk')
            return 'glc_uk'
        elif ((col_num < 505) & (col_num > 36)).all():
            return 'glc_us'
        else:
            return 'unknown'

    except Exception:
        # print('not num')
        return 'unknown'


def find_header(df):
    dropped = df.dropna()
    dropped.columns = ['time', 'glc']
    count = 0
    for i, row in dropped.iterrows():
        is_date = try_parsing_date(row['time'])
        if not is_date:
            count += 1
            continue
        try:
            float(row['glc'])
            break
        except Exception:
            count += 1
    if count == dropped.shape[0]:
        print(df.head())
        raise Exception('Problem with input data')
    return dropped.iloc[count:]

In [34]:
def preprocess_data(df, id_colname=None):
    # print(df.head())
    max_rows = df.count().max()
    cols_to_keep = df.count()[df.count() > max_rows * 0.7].index
    footer_rows = df[cols_to_keep].iloc[int(-max_rows * 0.1):]
    # print(cols_to_keep)
    col_type_dict = {'dt': [], 'glc_uk': [], 'glc_us': []}
    for i in cols_to_keep:
        #print(i)
        col_type = test_col(footer_rows[i])
        if col_type != 'unknown':
            col_type_dict[col_type].append(i)
    print(col_type_dict)
    if (len(col_type_dict['dt']) > 0) & (len(col_type_dict['glc_uk']) > 0):
        sub_frame = df[[col_type_dict['dt'][-1], col_type_dict['glc_uk'][0]]]
        df_processed = find_header(sub_frame)
    elif (len(col_type_dict['dt']) > 0) & (len(col_type_dict['glc_us']) > 0):
        sub_frame = df[col_type_dict['dt'][-1], col_type_dict['glc_us'][0]]
        df_processed = find_header(sub_frame)
        try:
            df_processed['time'] = df_processed['time'] / 0.0555
        except Exception:
            print('Problem with input data')
    else:
        raise Exception('Can\'t identify datetime and/or glucose columns')
    if id_colname is not None:
        df_processed = df_processed.join(df[id_colname], how='left')
        df_processed.rename({id_colname: 'ID'}, inplace=True)
    df_processed.reset_index(drop=True, inplace=True)
    return df_processed

In [35]:
def format_df(filename, directory):
     # set filepath for each file in directory
    filepath = directory + '/' + filename
    print(filename)
    try:
        df = pd.read_excel(filepath)
    except Exception: # sort this out
        try:
            df = pd.read_csv(filepath, names=[i for i in range(30)])
        except Exception:
            print('File in wrong format, must be Excel or CSV')
    df.replace({'High': 22.2, 'Low': 2.2, 'HI':22.2, 'LO':2.2, 'hi':22.2, 'lo':2.2}, inplace=True)
    # Preprocess
    df_preprocessed = preprocess_data(df)
    # If there's no ID column, use filename as ID
    df_preprocessed['ID'] = filename.rsplit('.', 1)[0]
    # Calculate metrics
    df_preprocessed.glc = pd.to_numeric(df_preprocessed.glc)
    return df_preprocessed

In [36]:
# Map the function to all files in the directory
results = list(map(lambda filename: format_df(filename, directory),
                   os.listdir(directory)))
# Concatenate all files to make one dataframe
df_total = pd.concat(results).reset_index(drop=True)

2012_baseline.xlsx
{'dt': ['GlucoseInternalTime', 'GlucoseDisplayTime'], 'glc_uk': ['GlucoseValue'], 'glc_us': []}
1045_6months.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
1045_baseline.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
1031_6months.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
2043_6months.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
2026_baseline.xlsx
{'dt': ['GlucoseInternalTime', 'GlucoseDisplayTime'], 'glc_uk': ['GlucoseValue'], 'glc_us': []}
1038_baseline.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
1021_6months.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
1004_6months.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}
1029_6months.xlsx
{'dt': ['InternalTime2', 'DisplayTime3'], 'glc_uk': ['Value4'], 'glc_us': []}


# Map the function to all files in the directory
results = list(map(lambda filename: preprocess_helper.combine_frame(filename, directory), os.listdir(directory)))
# Concatenate all files to make one dataframe
df_total = pd.concat(results).reset_index(drop=True)

In [37]:
df_total[['ID', 'period']] = df_total.ID.str.split('_', expand=True)

In [38]:
df_total.ID.apply(lambda x: int(x[:4]))

0         2012
1         2012
2         2012
3         2012
4         2012
5         2012
6         2012
7         2012
8         2012
9         2012
10        2012
11        2012
12        2012
13        2012
14        2012
15        2012
16        2012
17        2012
18        2012
19        2012
20        2012
21        2012
22        2012
23        2012
24        2012
25        2012
26        2012
27        2012
28        2012
29        2012
30        2012
31        2012
32        2012
33        2012
34        2012
35        2012
36        2012
37        2012
38        2012
39        2012
40        2012
41        2012
42        2012
43        2012
44        2012
45        2012
46        2012
47        2012
48        2012
49        2012
50        2012
51        2012
52        2012
53        2012
54        2012
55        2012
56        2012
57        2012
58        2012
59        2012
60        2012
61        2012
62        2012
63        2012
64        2012
65        2012
66        

In [39]:
df_total.head()

Unnamed: 0,time,glc,ID,period
0,2018-01-02 15:28:00,15.28,2012,baseline
1,2018-01-02 15:33:00,15.17,2012,baseline
2,2018-01-02 15:38:00,15.28,2012,baseline
3,2018-01-02 15:43:00,15.5,2012,baseline
4,2018-01-02 15:48:00,15.67,2012,baseline


## 2.3. Lab & demographic data

In [40]:
demographics = pd.read_excel('../../Data/raw_data/Demographics, measures and c-peptide.xlsx', sheet_name=1)

In [41]:
demographics.head()

Unnamed: 0,site,label,agedv,gender,mh_t1dm_date_vst1,mh_t1dm_duration_vst1,bp_avge_sys_dv_vst1,bp_avge_dia_int_vst1,height_vst1,weight_vst1,waist_vst1,bodyfat_vst1,hba1c_result_vst1
0,s01taunt,EXT1001,34,Male,2013-04-01,4.775342,123,74,180,94.7,101.5,30.0,60
1,s01taunt,EXT1003,67,Male,1980-01-01,38.04657,122,71,170,95.2,103.5,29.5,51
2,s01taunt,EXT1004,61,Male,1982-01-01,36.04383,105,62,180,74.7,84.0,19.3,54
3,s01taunt,EXT1005,53,Female,1988-07-01,29.54247,128,79,166,94.5,109.0,44.2,88
4,s01taunt,EXT1006,65,Female,1968-04-01,49.80822,172,84,158,61.0,85.6,33.5,49


In [42]:
c_pep = pd.read_excel('../../Data/raw_data/Patient data.xlsx')

In [43]:
c_pep.head()

Unnamed: 0,ID,Sex,Age,Duration of diabetes,Arm,Cpeptide (pmol/L)
0,1001,M,34,5.0,control,88
1,1003,M,67,38.0,control,29
2,1004,M,61,36.0,Treatment,<3
3,1005,F,53,30.0,control,<3
4,1006,F,65,50.0,Treatment,<3


In [44]:
# Set id as first 4 characters
demographics['ID'] = demographics.label.apply(lambda x: int(x[3:7]))

In [45]:
# Merge demographics and cpep on id
demo_lab = pd.merge(demographics, c_pep[['ID', 'Cpeptide (pmol/L)']], on='ID')

In [46]:
# Calculate bmi from weight and height
demo_lab['bmi'] = demo_lab.weight_vst1/(demo_lab.height_vst1/100 * demo_lab.height_vst1/100)

In [47]:
# Select useful columns
demo_lab = demo_lab[['ID', 'agedv', 'gender', 'mh_t1dm_duration_vst1', 'bmi',
                     'hba1c_result_vst1', 'Cpeptide (pmol/L)']]
# Rename columns
demo_lab.columns = ['ID', 'age', 'sex', 'years_since_diagnosis', 'bmi', 'hba1c',
                    'cpep']

In [48]:
demo_lab.head()

Unnamed: 0,ID,age,sex,years_since_diagnosis,bmi,hba1c,cpep
0,1001,34,Male,4.775342,29.228395,60,88
1,1003,67,Male,38.04657,32.941176,51,29
2,1004,61,Male,36.04383,23.055556,54,<3
3,1005,53,Female,29.54247,34.293802,88,<3
4,1006,65,Female,49.80822,24.435187,49,<3


In [49]:
# Set sex to lower case
demo_lab.sex = demo_lab.sex.apply(lambda x: x.lower())

In [50]:
# Replace cpep below 3 with 3
demo_lab.cpep.replace({'<3':2.9}, inplace=True)

In [51]:
demo_lab.head()

Unnamed: 0,ID,age,sex,years_since_diagnosis,bmi,hba1c,cpep
0,1001,34,male,4.775342,29.228395,60,88.0
1,1003,67,male,38.04657,32.941176,51,29.0
2,1004,61,male,36.04383,23.055556,54,2.9
3,1005,53,female,29.54247,34.293802,88,2.9
4,1006,65,female,49.80822,24.435187,49,2.9


In [52]:
exdi.head()

Unnamed: 0,ID,type_of_exercise,starting_glucose,finishing_glucose,duration,intensity,start_datetime,finish_datetime,month,day,day_of_week,time_of_day,form_of_exercise
0,1001,Cycling,13.8,14.0,7.0,13.0,2018-01-09 08:49:00,2018-01-09 08:56:00,1,9,1,morning,aer
1,1001,Cycling,12.6,11.7,8.0,13.0,2018-01-09 17:02:00,2018-01-09 17:10:00,1,9,1,evening,aer
2,1001,Running,11.1,6.6,34.0,17.0,2018-01-09 17:24:00,2018-01-09 17:58:00,1,9,1,evening,aer
3,1001,Cycling,12.2,12.4,8.0,13.0,2018-01-10 08:47:00,2018-01-10 08:55:00,1,10,2,morning,aer
4,1001,Cycling,16.2,15.8,10.0,13.0,2018-01-10 17:32:00,2018-01-10 17:42:00,1,10,2,evening,aer


## 2.4. Save files

In [53]:
df_directory = '../../Data/tidy_data/'
exdi.to_csv(df_directory+'extod_edu_diaries.csv', index=False)
df_total.to_csv(df_directory+'extod_edu_cgm.csv', index=False)
demo_lab.to_csv(df_directory+'extod_edu_demo_lab.csv', index=False)