# Column translation

In [40]:
import os
import sys

# Add modules path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [41]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
from Translator import Translator
from preprocessor import define_blocks, mage
from visualization import plot_blocks

# Define language and translator functions
language = "es"
translator = Translator(language)
to_lang = translator.translate_to_language
to_col = translator.translate_to_column

# Load data
raw_data = pd.read_csv("sample.txt", header=0, skiprows=1, delimiter="\t", index_col=0, usecols=list(range(0, 9)),
                       parse_dates=to_lang(["Datetime"]), decimal=",",
                       date_parser=lambda x: pd.to_datetime(x, format="%Y/%m/%d %H:%M"))
# Translate column names
raw_data.columns = (to_col(raw_data.columns))

raw_data.iloc[[0, 1, 7, 8]]

Unnamed: 0_level_0,Datetime,Register_Type,Glucose_Auto,Glucose_Manual,Rapid_Insulin_No_Val,Rapid_Insulin,Carbo_No_Val,Carbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
36781,2016-03-31 15:44:00,1,,64.0,,,,
36817,2016-03-31 15:43:00,0,61.0,,,,,
36823,2016-03-31 17:23:00,4,,,,1.0,,
36823,2016-03-31 17:23:00,5,,,,,,1.5


# Divide in blocks

In [42]:
data = define_blocks(raw_data)
data[data["Datetime"].dt.date == datetime.date(2016, 4, 1)]

Unnamed: 0,Datetime,Glucose_Auto,Block,Day_Block,Last_Meal,Overlapped_Block,Carbo_Block,Rapid_Insulin_Block
33,2016-04-01 00:01:00,82.0,2,2016-03-31,2016-03-31 21:30:00,False,1.0,1.0
34,2016-04-01 00:16:00,84.0,2,2016-03-31,2016-03-31 21:30:00,False,1.0,1.0
35,2016-04-01 00:31:00,95.0,2,2016-03-31,2016-03-31 21:30:00,False,1.0,1.0
36,2016-04-01 00:46:00,96.0,2,2016-03-31,2016-03-31 21:30:00,False,1.0,1.0
37,2016-04-01 01:01:00,96.0,2,2016-03-31,2016-03-31 21:30:00,False,1.0,1.0
38,2016-04-01 01:18:00,113.0,2,2016-03-31,2016-03-31 21:30:00,False,1.0,1.0
39,2016-04-01 01:33:00,141.0,0,2016-04-01,2016-03-31 21:30:00,False,0.0,0.0
40,2016-04-01 01:48:00,156.0,0,2016-04-01,2016-03-31 21:30:00,False,0.0,0.0
41,2016-04-01 02:03:00,148.0,0,2016-04-01,2016-03-31 21:30:00,False,0.0,0.0
42,2016-04-01 02:18:00,156.0,0,2016-04-01,2016-03-31 21:30:00,False,0.0,0.0


In [42]:
plot_blocks(data, datetime.date(2016, 4, 1), translator)

<IPython.core.display.Javascript object>

# Extending dataset


## Delete rows with no previous meal (Initial values)

In [44]:
data[(data["Last_Meal"].isnull())]

Unnamed: 0,Datetime,Glucose_Auto,Block,Day_Block,Last_Meal,Overlapped_Block,Carbo_Block,Rapid_Insulin_Block
0,2016-03-31 15:43:00,61.0,1,2016-03-31,,False,1.5,1.0
1,2016-03-31 15:58:00,61.0,1,2016-03-31,,False,1.5,1.0
2,2016-03-31 16:13:00,71.0,1,2016-03-31,,False,1.5,1.0
3,2016-03-31 16:28:00,80.0,1,2016-03-31,,False,1.5,1.0
4,2016-03-31 16:43:00,83.0,1,2016-03-31,,False,1.5,1.0
5,2016-03-31 16:58:00,83.0,1,2016-03-31,,False,1.5,1.0
6,2016-03-31 17:14:00,75.0,1,2016-03-31,,False,1.5,1.0


In [45]:
data.dropna(inplace='True', subset=['Last_Meal'])
data.head(6)

Unnamed: 0,Datetime,Glucose_Auto,Block,Day_Block,Last_Meal,Overlapped_Block,Carbo_Block,Rapid_Insulin_Block
7,2016-03-31 17:29:00,64.0,1,2016-03-31,2016-03-31 17:23:00,False,1.5,1.0
8,2016-03-31 17:44:00,55.0,1,2016-03-31,2016-03-31 17:23:00,False,1.5,1.0
9,2016-03-31 17:59:00,55.0,1,2016-03-31,2016-03-31 17:23:00,False,1.5,1.0
10,2016-03-31 18:14:00,75.0,1,2016-03-31,2016-03-31 17:23:00,False,1.5,1.0
11,2016-03-31 18:29:00,83.0,1,2016-03-31,2016-03-31 17:23:00,False,1.5,1.0
12,2016-03-31 18:44:00,89.0,1,2016-03-31,2016-03-31 17:23:00,False,1.5,1.0


## Add block information


In [46]:
new_columns = data.groupby(['Block', 'Day_Block']).agg({'Glucose_Auto': [np.mean, np.std, np.min, np.max]})["Glucose_Auto"]
new_columns.columns = ["Glucose_Mean_Block", "Glucose_Std_Block", "Glucose_Min_Block", "Glucose_Max_Block"]
new_columns = new_columns.reset_index(level=[0, 1])
new_data = pd.merge(data, new_columns, on=["Block", "Day_Block"], how='left')
new_data[["Datetime", "Glucose_Auto", "Block", "Day_Block", "Glucose_Mean_Block", "Glucose_Std_Block", 
          "Glucose_Min_Block", "Glucose_Max_Block"]].iloc[[1, 20, 40, 60, 80, 100]]

Unnamed: 0,Datetime,Glucose_Auto,Block,Day_Block,Glucose_Mean_Block,Glucose_Std_Block,Glucose_Min_Block,Glucose_Max_Block
1,2016-03-31 17:44:00,55.0,1,2016-03-31,90.25,21.324477,55.0,119.0
20,2016-03-31 20:59:00,114.0,2,2016-03-31,105.0,13.128733,82.0,123.0
40,2016-04-01 01:48:00,156.0,0,2016-04-01,189.571429,25.120851,141.0,224.0
60,2016-04-01 06:50:00,166.0,1,2016-04-01,102.458333,36.107303,47.0,166.0
80,2016-04-01 10:36:00,91.0,1,2016-04-01,102.458333,36.107303,47.0,166.0
100,2016-04-01 13:22:00,97.0,2,2016-04-01,92.458333,29.34203,47.0,139.0


## Add day information

In [47]:
new_columns = new_data.groupby(['Day_Block']).agg({'Glucose_Auto': [np.mean, np.std, np.min, np.max]})["Glucose_Auto"]
new_columns.columns = ["Glucose_Mean_Day", "Glucose_Std_Day", "Glucose_Min_Day", "Glucose_Max_Day"]
new_columns = new_columns.reset_index(level=0)
new_data = pd.merge(new_data, new_columns, on='Day_Block', how='left')
new_data[["Datetime", "Glucose_Auto", "Block", "Day_Block", "Glucose_Mean_Day", "Glucose_Std_Day",
          "Glucose_Min_Day", "Glucose_Max_Day"]].iloc[[1, 20, 40, 60, 80, 100]]

Unnamed: 0,Datetime,Glucose_Auto,Block,Day_Block,Glucose_Mean_Day,Glucose_Std_Day,Glucose_Min_Day,Glucose_Max_Day
1,2016-03-31 17:44:00,55.0,1,2016-03-31,98.948718,18.256864,55.0,123.0
20,2016-03-31 20:59:00,114.0,2,2016-03-31,98.948718,18.256864,55.0,123.0
40,2016-04-01 01:48:00,156.0,0,2016-04-01,116.715152,44.708378,47.0,224.0
60,2016-04-01 06:50:00,166.0,1,2016-04-01,116.715152,44.708378,47.0,224.0
80,2016-04-01 10:36:00,91.0,1,2016-04-01,116.715152,44.708378,47.0,224.0
100,2016-04-01 13:22:00,97.0,2,2016-04-01,116.715152,44.708378,47.0,224.0


## Calculating MAGE (Glycemic variability)

In [48]:
import peakdetect

day = datetime.datetime(2016, 4, 1)
values = new_data[new_data["Day_Block"] == day.date()][["Datetime", "Glucose_Auto"]].reset_index(drop=True)
vector = values["Glucose_Auto"]

std = np.std(vector)

peaks = peakdetect.peakdetect(np.array(vector), lookahead=2, delta=std)
# peakdetect returns two lists, respectively positive and negative peaks,
# with for each peak a tuple of (indexes, values).
indexes = []
peak_values =[]
for posOrNegPeaks in peaks:
    for peak in posOrNegPeaks:
        indexes.append(peak[0])
        peak_values.append((peak[1]))
print('Standard deviation is {:f}'.format(std))
print('Peaks indexes are: %s' % (indexes))
print('Peaks values are: %s' % (peak_values))

_, ax = plt.subplots()
values.plot(ax=ax, kind='line', x="Datetime", y="Glucose_Auto")
ax.plot(values.ix[indexes]["Datetime"], values.ix[indexes]["Glucose_Auto"], '+', mfc=None, mec='b', mew=2, ms=8)
ax.plot(values.ix[sorted(indexes)]["Datetime"], values.ix[sorted(indexes)]["Glucose_Auto"], mfc=None, mec='g', mew=2, ms=8)

plt.show()

Standard deviation is 44.572693
Peaks indexes are: [9, 38, 102, 152, 31, 49, 144]
Peaks values are: [224.0, 139.0, 169.0, 161.0, 93.0, 47.0, 52.0]


<IPython.core.display.Javascript object>

In [49]:
differences = []
for first, second in zip(peak_values, peak_values[1:]):
    differences.append(np.abs(first-second))
differences

[85.0, 30.0, 8.0, 68.0, 46.0, 5.0]

In [50]:
#Filter diferences greater than standard deviation
valid_differences = [elem for elem in differences if elem > std]
valid_differences

[85.0, 68.0, 46.0]

In [51]:
MAGE = sum(valid_differences) / len(valid_differences)
MAGE

66.333333333333329

## Add MAGE to each day

In [52]:
days = new_data['Day_Block'].unique()
for day in days:
    new_data.loc[new_data['Day_Block'] == day, "MAGE"] = mage(new_data[new_data['Day_Block'] == day])
new_data[["Datetime", "Glucose_Auto", "Block", "Day_Block", "Glucose_Std_Day", "MAGE"]].iloc[[1, 200, 400,
                                                                                              600, 800, 1000]]

Unnamed: 0,Datetime,Glucose_Auto,Block,Day_Block,Glucose_Std_Day,MAGE
1,2016-03-31 17:44:00,55.0,1,2016-03-31,18.256864,
200,2016-04-02 01:15:00,83.0,6,2016-04-01,44.708378,66.333333
400,2016-04-03 12:43:00,134.0,2,2016-04-03,38.867359,61.5
600,2016-04-05 11:32:00,60.0,2,2016-04-05,26.580867,59.0
800,2016-04-06 11:12:00,94.0,3,2016-04-06,33.541394,53.333333
1000,2016-04-07 16:40:00,109.0,3,2016-04-07,43.731306,62.0


## Add additional information

In [53]:
new_data.loc[:, "Weekday"] = new_data.apply(lambda row: row["Day_Block"].weekday()+1, axis=1)
new_data.loc[:, "Minutes_Last_Meal"] = new_data.apply(lambda row: int((row["Datetime"] - row["Last_Meal"])
                                                                      .total_seconds() / 60), axis=1)
new_data [["Datetime", "Day_Block", "Weekday", "Last_Meal","Minutes_Last_Meal"]].iloc[[1, 200, 400, 600, 800, 1000]]

Unnamed: 0,Datetime,Day_Block,Weekday,Last_Meal,Minutes_Last_Meal
1,2016-03-31 17:44:00,2016-03-31,4,2016-03-31 17:23:00,21
200,2016-04-02 01:15:00,2016-04-01,5,2016-04-01 22:11:00,184
400,2016-04-03 12:43:00,2016-04-03,7,2016-04-03 09:01:00,222
600,2016-04-05 11:32:00,2016-04-05,2,2016-04-05 10:23:00,69
800,2016-04-06 11:12:00,2016-04-06,3,2016-04-06 08:03:00,189
1000,2016-04-07 16:40:00,2016-04-07,4,2016-04-07 16:27:00,13


## Add label

In [None]:
def label_map(value):
    
    hypoglycemia_threshold = 70
    hyperglycemia_threshold = 180
    
    if value < hypoglycemia_threshold:
        return 'Hypoglycemia'
    elif value > hyperglycemia_threshold:
        return 'Hyperglycemia'
    else:
        return 'Normal'

new_data.loc[:, "Diagnosis"] = new_data["Glucose_Auto"].apply(label_map)
new_data [["Datetime", "Glucose_Auto", "Diagnosis"]].iloc[[1, 200, 400, 600, 800, 1400]]