In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!pip install -q keras

In [0]:
##----- Importing Required Packages -----##
import pandas as pd
import numpy as np
import re
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
import pandas as pd
from numpy.random import seed

In [0]:
bg = pd.read_csv("/content/drive/My Drive/BioConscious/blood-glucose-data.csv")
da = pd.read_csv("/content/drive/My Drive/BioConscious/distance-activity-data.csv")
hr = pd.read_csv("/content/drive/My Drive/BioConscious/heart-rate-data.csv")

In [0]:
##----- Converting Date Column into Date-Time Format -----##
bg.dtypes
bg["point_timestamp"] = pd.to_datetime(bg.point_timestamp)
da["point_timestamp"] = pd.to_datetime(da.point_timestamp)
hr["point_timestamp"] = pd.to_datetime(hr.point_timestamp)
bg.dtypes

point_value(mg/dL)             int64
point_timestamp       datetime64[ns]
timezone_offset                int64
dtype: object

In [0]:
##----- Interpolating the Blood Sugar Data -----##
bg2 = bg.copy()
bg2.point_timestamp = pd.to_datetime(bg2['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00")
point_time = pd.date_range(start = bg2.point_timestamp.min(), end = bg2.point_timestamp.max(), freq = "1min")
point_time = pd.DataFrame({"point_timestamp" : point_time})
point_time["Check"] = 1

In [0]:
##----- Merging to get 1 min intervals -----##
bg2 = point_time.merge(bg2, how = "outer", left_on = "point_timestamp", 
                       right_on = "point_timestamp").sort_values(by = ["point_timestamp"])

bg2 = bg2.drop(["Check", "timezone_offset"], axis = 1)
bg2 = bg2.rename(columns = {"point_value(mg/dL)": "point_value.mg.dL"})

bg2["point_value.mg.dL"] = bg2["point_value.mg.dL"].interpolate(method = "linear")
bg2["point_value.mg.dL"] = round(bg2["point_value.mg.dL"])
bg2 = bg2.groupby(["point_timestamp"]).mean()
bg2["point_timestamp"] = bg2.index
bg2.index = range(len(bg2))

In [0]:
##----- Interpolating the Heart Rate Data -----##
hr2 = hr.copy()
hr2.point_timestamp = pd.to_datetime(hr2['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00")
point_time_hr = pd.date_range(start = hr2.point_timestamp.min(), end = hr2.point_timestamp.max(), freq = "1min")
point_time_hr = pd.DataFrame({"point_timestamp" : point_time_hr})
point_time_hr["Check"] = 1

In [0]:
##----- Merging to get 1 min intervals -----##
hr2 = point_time_hr.merge(hr2, how = "outer", left_on = "point_timestamp", 
                       right_on = "point_timestamp").sort_values(by = ["point_timestamp"])

hr2 = hr2.drop(["Check", "timezone_offset"], axis = 1)

hr2["point_value"] = hr2["point_value"].interpolate(method = "linear")
hr2["point_value"] = round(hr2["point_value"])
hr2 = hr2.groupby(["point_timestamp"]).mean()
hr2["point_timestamp"] = hr2.index
hr2.index = range(len(hr2))

In [0]:
##----- Joining Heart Rate to Blood Sugar -----##
bg2 = bg2.merge(hr2, how = "left", left_on = "point_timestamp", right_on = "point_timestamp")
bg2.dropna(axis = 0, inplace = True)

In [0]:
##----- Interpolating the Distance Data -----##
da2 = da.copy()
da_iphone = da2[da2.device == "iPhone"]
da_fitbit = da2[da2.device == "FitbitWatch"]
da_iphone = da_iphone[da_iphone["point_value(kilometers)"] > 0]
da_fitbit = da_fitbit[da_fitbit["point_value(kilometers)"] > 0]

da_iphone.point_timestamp = pd.to_datetime(da_iphone['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00")
da_fitbit.point_timestamp = pd.to_datetime(da_fitbit['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00")

da_full = da_iphone.append(da_fitbit, ignore_index=True)
da_full = da_full.groupby(["point_timestamp"]).mean()
da_full["point_timestamp"] = da_full.index
da_full.index = range(len(da_full))
da_full = da_full.drop(["timezone_offset"], axis = 1)


point_time_da = pd.date_range(start = da_full.point_timestamp.min(), end = da_full.point_timestamp.max(), freq = "1min")
point_time_da = pd.DataFrame({"point_timestamp" : point_time_da})
point_time_da["Check"] = 1

In [0]:
##----- Merging to get 1 min intervals -----##
da_full = point_time_da.merge(da_full, how = "outer", left_on = "point_timestamp", 
                       right_on = "point_timestamp").sort_values(by = ["point_timestamp"])

da_full = da_full.drop(["Check"], axis = 1)
da_full = da_full.rename(columns = {"point_value(kilometers)": "point_value.kilometers"})

da_full["point_value.kilometers"] = da_full["point_value.kilometers"].interpolate(method = "linear")

da_full = da_full.groupby(["point_timestamp"]).mean()
da_full["point_timestamp"] = da_full.index
da_full.index = range(len(da_full))

In [0]:
##----- Joining Distance to Blood Sugar & Heart Rate -----##
bg2 = bg2.merge(da_full, how = "left", left_on = "point_timestamp", right_on = "point_timestamp")

In [0]:
##----- Aggregating Data to 5 min intervals -----##
bg2 = bg2[0:len(bg2) - len(bg2) % 5]
bg2["grp"] = np.repeat(range(1,int((len(bg2) + 5) /5) ), 5)
bg2 = bg2.groupby(["grp"]).agg({"point_value.mg.dL": 'mean' , "point_value": 'mean',
                           "point_value.kilometers": 'mean', 'point_timestamp': 'min'})
bg2["grp"] = bg2.index
bg2.index = range(len(bg2))
bg2["point_value.mg.dL"] = round(bg2["point_value.mg.dL"])
bg2["point_value"] = round(bg2["point_value"])

In [0]:
##----- Creating 5 min Future Value as Y -----##
bg2["future"] = pd.to_datetime(bg2['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=5)

bg2 = bg2.merge(bg2[["point_timestamp", "point_value.mg.dL"]], how = "inner", left_on = "future", right_on = "point_timestamp")
bg2 = bg2.drop(columns = ["point_timestamp_y"], axis = 1)
bg2 = bg2.rename(columns = {"point_value.mg.dL_x" : "point_value.mg.dL", "point_value.mg.dL_y" : "Y"})

In [0]:
##----- Getting Moving average of past Blood-Sugar level as a Predictor -----##
bg2 = bg2.dropna(how = "any")
bg2["maverage"] = bg2.loc[:, ["point_value.mg.dL"]].rolling(window=12, min_periods = 1).mean()
mavg = bg2["maverage"].tolist()
mavg.insert(0,np.nan)
mavg.pop(len(mavg)-1)
bg2["maverage"] = mavg
bg2.loc[0,"maverage"] = bg2.loc[1,"maverage"]
bg2["maverage"] = round(bg2["maverage"])

In [0]:
##----- Splitting data into Train and 1 week Validation Data set -----##
bg2 = bg2.rename(columns = {"point_timestamp_x" : "point_timestamp"})
bg2_train = bg2.loc[(bg2["point_timestamp"] <= "2017-05-29 23:59:00")]
bg2_Test = bg2.loc[(bg2["point_timestamp"] > "2017-05-29 23:59:00") & (bg2["point_timestamp"] <= "2017-06-05 23:59:00")]
bg2_actTest = bg[(bg["point_timestamp"] > "2017-05-29 23:59:00") & (bg["point_timestamp"] <= "2017-06-05 23:59:00")]
bg2_actTest["point_timestamp"] = pd.to_datetime(bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00")

bg2_actTest = bg2_actTest.groupby("point_timestamp").mean()
bg2_actTest["point_timestamp"] = bg2_actTest.index
bg2_actTest.index = range(0,len(bg2_actTest))
bg2_actTest = bg2_actTest.drop(columns = ["timezone_offset"], axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [0]:
##----- Assuming that the test data has a new starting point. Recalculating moving average -----##
bg2_Test["maverage"] = np.nan
bg2_Test["maverage"] = bg2_Test.loc[:, ["point_value.mg.dL"]].rolling(window=12, min_periods = 1).mean()

mavg2 = bg2_Test["maverage"].tolist()
mavg2.insert(0,np.nan)
mavg2.pop(len(mavg2)-1)
bg2_Test["maverage"] = mavg2
bg2_Test = bg2_Test.reset_index()
bg2_Test.loc[0,"maverage"] = bg2_Test.loc[1,"maverage"]
bg2_Test["maverage"] = round(bg2_Test["maverage"])

bg2_train["speed"] = bg2_train["point_value.kilometers"]/(5/60)
bg2_Test["speed"] = bg2_Test["point_value.kilometers"]/(5/60)
bg2_train["day_night"] = np.where((bg2_train['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") >= 7)
                                            & (bg2_train['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") < 19),
                                            1,0)

bg2_Test["day_night"] = np.where((bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") >= 7)
                                            & (bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") < 19),
                                            1,0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [0]:
##----- Ensuring the 1 min difference in the actual test data matches the interpolated one to calculate actual MSE -----##
bg2_actTest["point_timestamp"] = np.where((bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 2)
            | (bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 7),
                                          pd.to_datetime(bg2_actTest['point_timestamp'] - pd.Timedelta(minutes=1)),
                                                         np.where((bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 0)
            | (bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 5), 
            pd.to_datetime(bg2_actTest['point_timestamp'] + pd.Timedelta(minutes=1)),bg2_actTest["point_timestamp"] + pd.Timedelta(minutes=0)))


In [0]:
##----- Scaling Data for Neural Nets -----##
bg2_train = bg2_train.drop(columns = ["grp"])
bg2_Test = bg2_Test.drop(columns = ["index","grp"])
a = pd.concat([bg2_train, bg2_Test])
scaled = a.copy()
scaled.loc[:,['point_value.mg.dL', 'point_value', 'point_value.kilometers',
        'Y', 'maverage', 'speed', 'day_night']] -= scaled.drop(columns = ["point_timestamp", "future"], axis=1).min()

scaled.loc[:,['point_value.mg.dL', 'point_value', 'point_value.kilometers',
        'Y', 'maverage', 'speed', 'day_night']] /= scaled.drop(columns = ["point_timestamp", "future"], axis=1).max()

scaled = scaled.drop(columns = ["point_timestamp", "future"])
scaled_train = scaled[0:len(bg2_train)]
scaled_test = scaled[(len(bg2_train)):len(scaled)]

In [0]:
scaled_test1 = scaled_test.copy()
scaled_test1["day_night"] = scaled_test1["day_night"].astype("int")
list_pred = []
seed(451)

model = Sequential()
model.add(Dense(3, input_dim=6, activation='linear'))
model.add(Dense(2, activation='linear'))
model.add(Dense(1, activation='linear'))
sgd = optimizers.SGD(lr=0.1)
#adam = optimizers.adam(lr = 0.1)
model.compile(loss='mean_squared_error', optimizer=sgd)

In [0]:
random.seed(454)
model.fit(scaled_train.loc[:, ['point_value.mg.dL', 'point_value', 'point_value.kilometers',
              'maverage', 'speed', 'day_night']], scaled_train.loc[:,"Y"], epochs=1000, batch_size=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f5beb7af6a0>

In [0]:
for i in range(1,13):
  pr_nn = model.predict(scaled_test1.drop(columns = ["Y"]))
  pr_nn1 = pr_nn * (max(a.Y) - min(a.Y)) + min(a.Y)
  pr_nn1 = pr_nn1.astype("int")
  list_pred.append(pr_nn1)
  scaled_test1["maverage"] = (scaled_test1["maverage"] + scaled_test1["point_value.mg.dL"]) / 2
  scaled_test1["point_value.mg.dL"] = pr_nn

nn_pred = pd.DataFrame.from_records(list_pred).transpose()
nn_pred = nn_pred.rename(columns = {0 : "Mins_5", 1 : "Mins_10", 2 : "Mins_15", 3 : "Mins_20", 4 : "Mins_25", 5 : "Mins_30", 6 : "Mins_35", 7 : "Mins_40",
                          8 : "Mins_45", 9 : "Mins_50", 10 : "Mins_55", 11 : "Mins_60"})

In [0]:
##----- Joining actual values for every 5 mins until 60 minutes -----##
bg2_actTest["cc"] = 1
bg2_Test = bg2_Test.merge(bg2_actTest.loc[:,["point_timestamp","cc"]], how = "left",left_on = "point_timestamp", right_on = "point_timestamp" )
bg2_Test.head()

Unnamed: 0,point_value.mg.dL,point_value,point_value.kilometers,point_timestamp,future,Y,maverage,speed,day_night,cc
0,137.0,105.0,0.016166,2017-05-30 00:01:00,2017-05-30 00:06:00,133.0,137.0,0.193988,0,1.0
1,133.0,110.0,0.015808,2017-05-30 00:06:00,2017-05-30 00:11:00,123.0,137.0,0.189699,0,1.0
2,123.0,119.0,0.015451,2017-05-30 00:11:00,2017-05-30 00:16:00,113.0,135.0,0.185411,0,1.0
3,113.0,114.0,0.015093,2017-05-30 00:16:00,2017-05-30 00:21:00,104.0,131.0,0.181122,0,1.0
4,104.0,110.0,0.014736,2017-05-30 00:21:00,2017-05-30 00:26:00,96.0,126.0,0.176833,0,1.0


In [0]:
bg2_Test = bg2_Test.drop(columns = ["Y"])
bg2_Test["point_value.mg.dL"] = np.where((bg2_Test["cc"] == 1 ), bg2_Test["point_value.mg.dL"], np.nan)

In [0]:
##----- Getting 5 min actual future values -----##
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_5_actual'})

In [0]:
##----- Getting 10 min actual future values -----##
bg2_Test["future10"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=10)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future10", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_10_actual'})

In [0]:
##----- Getting 15 min actual future values -----##
bg2_Test["future15"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=15)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future15", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_15_actual'})

In [0]:
##----- Getting 20 min actual future values -----##
bg2_Test["future20"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=20)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future20", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_20_actual'})

In [0]:
##----- Getting 25 min actual future values -----##
bg2_Test["future25"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=25)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future25", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_25_actual'})

In [0]:
##----- Getting 30 min actual future values -----##
bg2_Test["future30"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=30)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future30", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_30_actual'})

In [0]:
##----- Getting 35 min actual future values -----##
bg2_Test["future35"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=35)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future35", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_35_actual'})

In [0]:
##----- Getting 40 min actual future values -----##
bg2_Test["future40"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=40)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future40", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_40_actual'})

In [0]:
##----- Getting 45 min actual future values -----##
bg2_Test["future45"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=45)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future45", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_45_actual'})

In [0]:
##----- Getting 50 min actual future values -----##
bg2_Test["future50"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=50)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future50", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_50_actual'})

In [0]:
##----- Getting 55 min actual future values -----##
bg2_Test["future55"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=55)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future55", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_55_actual'})

In [0]:
##----- Getting 60 min actual future values -----##
bg2_Test["future60"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=60)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future60", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_60_actual'})

In [0]:
##----- Creating a data frame with 1 hour trend of actual Blood sugar levels -----##
min_cols = [col for col in bg2_Test.columns if 'Mins_' in col]
act = bg2_Test.copy()
act = act.loc[:,min_cols]
nn_pred = nn_pred.astype("int")

In [0]:
##----- Calculating Rmse -----##
diff_list = []
ard_list = []
for i in range(0,12):
  diff_list.append((act.iloc[:,i] - nn_pred.iloc[:,i])**2)
  ard_list.append( (abs(act.iloc[:,i] - nn_pred.iloc[:,i]) / nn_pred.iloc[:,i]) *100 ) 
diff_tb = pd.DataFrame.from_records(diff_list).transpose()
ARD_tb = pd.DataFrame.from_records(ard_list).transpose()
rmse = (sum(diff_tb.sum()) / ((len(diff_tb.columns) * len(diff_tb)) - sum(len(diff_tb) - diff_tb.count())))**0.5
MARD = sum(ARD_tb.sum()) / ((len(ARD_tb.columns) * len(ARD_tb)) - sum(len(ARD_tb) - ARD_tb.count()))
print("RMSE:", round(rmse,2))
print("MARD:", round(MARD,2))

RMSE: 25.87
MARD: 12.19


In [0]:
##----- Function for Clarkes and Parkes Error Grid -----##
def clarke_error_zone_detailed(act, pred):
    """
    This function outputs the Clarke Error Grid region (encoded as integer)
    for a combination of actual and predicted value
    Based on 'Evaluating clinical accuracy of systems for self-monitoring of blood glucose':
    https://care.diabetesjournals.org/content/10/5/622
    """
    # Zone A
    if (act < 70 and pred < 70) or abs(act - pred) < 0.2 * act:
        return 0
    # Zone E - left upper
    if act <= 70 and pred >= 180:
        return 8
    # Zone E - right lower
    if act >= 180 and pred <= 70:
        return 7
    # Zone D - right
    if act >= 240 and 70 <= pred <= 180:
        return 6
    # Zone D - left
    if act <= 70 <= pred <= 180:
        return 5
    # Zone C - upper
    if 70 <= act <= 290 and pred >= act + 110:
        return 4
    # Zone C - lower
    if 130 <= act <= 180 and pred <= (7/5) * act - 182:
        return 3
    # Zone B - upper
    if act < pred:
        return 2
    # Zone B - lower
    return 1

def parkes_error_zone_detailed(act, pred, diabetes_type):
    """
    This function outputs the Parkes Error Grid region (encoded as integer)
    for a combination of actual and predicted value
    for type 1 and type 2 diabetic patients
    Based on the article 'Technical Aspects of the Parkes Error Grid':
    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3876371/
    """
    def above_line(x_1, y_1, x_2, y_2, strict=False):
        if x_1 == x_2:
            return False

        y_line = ((y_1 - y_2) * act + y_2 * x_1 - y_1 * x_2) / (x_1 - x_2)
        return pred > y_line if strict else pred >= y_line

    def below_line(x_1, y_1, x_2, y_2, strict=False):
        return not above_line(x_1, y_1, x_2, y_2, not strict)

    def parkes_type_1(act, pred):
        # Zone E
        if above_line(0, 150, 35, 155) and above_line(35, 155, 50, 550):
            return "E"
        # Zone D - left upper
        if (pred > 100 and above_line(25, 100, 50, 125) and
                above_line(50, 125, 80, 215) and above_line(80, 215, 125, 550)):
            return "D"
        # Zone D - right lower
        if (act > 250 and below_line(250, 40, 550, 150)):
            return "D"
        # Zone C - left upper
        if (pred > 60 and above_line(30, 60, 50, 80) and
                above_line(50, 80, 70, 110) and above_line(70, 110, 260, 550)):
            return "C"
        # Zone C - right lower
        if (act > 120 and below_line(120, 30, 260, 130) and below_line(260, 130, 550, 250)):
            return "C"
        # Zone B - left upper
        if (pred > 50 and above_line(30, 50, 140, 170) and
                above_line(140, 170, 280, 380) and (act < 280 or above_line(280, 380, 430, 550))):
            return "B"
        # Zone B - right lower
        if (act > 50 and below_line(50, 30, 170, 145) and
                below_line(170, 145, 385, 300) and (act < 385 or below_line(385, 300, 550, 450))):
            return "B"
        # Zone A
        return "A"

    def parkes_type_2(act, pred):
        # Zone E
        if (pred > 200 and above_line(35, 200, 50, 550)):
            return "E"
        # Zone D - left upper
        if (pred > 80 and above_line(25, 80, 35, 90) and above_line(35, 90, 125, 550)):
            return "D"
        # Zone D - right lower
        if (act > 250 and below_line(250, 40, 410, 110) and below_line(410, 110, 550, 160)):
            return "D"
        # Zone C - left upper
        if (pred > 60 and above_line(30, 60, 280, 550)):
            return "C"
        # Zone C - right lower
        if (below_line(90, 0, 260, 130) and below_line(260, 130, 550, 250)):
            return "C"
        # Zone B - left upper
        if (pred > 50 and above_line(30, 50, 230, 330) and
                (act < 230 or above_line(230, 330, 440, 550))):
            return "B"
        # Zone B - right lower
        if (act > 50 and below_line(50, 30, 90, 80) and below_line(90, 80, 330, 230) and
                (act < 330 or below_line(330, 230, 550, 450))):
            return "B"
        # Zone A
        return "A"

    if diabetes_type == 1:
        return parkes_type_1(act, pred)

    if diabetes_type == 2:
        return parkes_type_2(act, pred)

    raise Exception('Unsupported diabetes type')

clarke_error_zone_detailed = np.vectorize(clarke_error_zone_detailed)
parkes_error_zone_detailed = np.vectorize(parkes_error_zone_detailed)

def zone_accuracy(act_arr, pred_arr, mode='clarke', detailed=False, diabetes_type=1):
    """
    Calculates the average percentage of each zone based on Clarke or Parkes
    Error Grid analysis for an array of predictions and an array of actual values
    """
    acc = np.zeros(9)
    if mode == 'clarke':
        res = clarke_error_zone_detailed(act_arr, pred_arr)
    elif mode == 'parkes':
        res = parkes_error_zone_detailed(act_arr, pred_arr, diabetes_type)
    else:
        raise Exception('Unsupported error grid mode')

    acc_bin = np.bincount(res)
    acc[:len(acc_bin)] = acc_bin

    if not detailed:
        acc[1] = acc[1] + acc[2]
        acc[2] = acc[3] + acc[4]
        acc[3] = acc[5] + acc[6]
        acc[4] = acc[7] + acc[8]
        acc = acc[:5]

    return acc / sum(acc)

In [0]:
##----- Creating a dataframe to calculate the Parkes Zone of Type 1 Diabetes -----##
zones_list = []
for i in range(0, len(act)):
  if act.iloc[i,].count() == 12:
    zones_list.append(parkes_error_zone_detailed(act.iloc[i,], nn_pred.iloc[i,],1))
zones_nn = pd.DataFrame.from_records(zones_list)
zones_nn.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,A,A,A,B,B,B,B,C,C,C,C,C
1,A,A,B,B,B,B,C,C,C,C,C,C
2,A,A,A,B,B,B,C,C,C,C,C,C
3,A,A,A,B,B,B,C,C,C,C,C,C
4,A,A,A,A,B,B,C,C,C,C,C,C


In [0]:
##----- Creating a dataframe to calculate the Parkes Zone of Type 2 Diabetes -----##
zones_list2 = []
for i in range(0, len(act)):
  if act.iloc[i,].count() == 12:
    zones_list2.append(parkes_error_zone_detailed(act.iloc[i,], nn_pred.iloc[i,],2))
zones2_nn = pd.DataFrame.from_records(zones_list2)
zones2_nn.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,A,A,A,A,A,B,B,B,B,C,C,C
1,A,A,A,A,A,B,B,B,B,C,C,C
2,A,A,A,A,A,B,B,B,B,C,C,C
3,A,A,A,A,A,A,B,B,B,B,C,C
4,A,A,A,A,A,A,B,B,B,B,B,C


In [0]:
##----- Calculating Frequency of Each Zone For type 1 Diabetes -----##
freq = np.unique(zones_nn, return_counts = True)
freq = pd.DataFrame.from_records(freq).transpose()
freq["percentage"] = (freq[1] / (len(zones_nn) * len(zones_nn.columns))) * 100
freq = freq.rename(columns = {0: "Zone", 1: "Count"})
freq

Unnamed: 0,Zone,Count,percentage
0,A,14589,82.9863
1,B,2890,16.4391
2,C,101,0.574516


In [0]:
##----- Calculating Frequency of Each Zone For type 2 Diabetes -----##
freq2 = np.unique(zones2_nn, return_counts = True)
freq2 = pd.DataFrame.from_records(freq2).transpose()
freq2["percentage"] = (freq2[1] / (len(zones2_nn) * len(zones2_nn.columns))) * 100
freq2 = freq2.rename(columns = {0: "Zone", 1: "Count"})
freq2

Unnamed: 0,Zone,Count,percentage
0,A,15996,90.9898
1,B,1560,8.87372
2,C,24,0.136519


In [0]:
##----- Checking Parkes Zones for 60 min predictions of Type 1 Diabetes -----##
zones_nn.iloc[:,11].value_counts() / len(zones_nn) * 100

A    63.481229
B    35.290102
C     1.228669
Name: 11, dtype: float64

In [0]:
##----- Checking Parkes Zones for 60 min predictions of Type 2 Diabetes -----##
zones2_nn.iloc[:,11].value_counts() / len(zones2_nn) * 100

A    80.887372
B    18.498294
C     0.614334
Name: 11, dtype: float64

In [0]:
##----- Testing Model on 5 Weeks Data -----##
bg2_train = bg2.loc[(bg2["point_timestamp"] <= "2017-05-29 23:59:00")]
bg2_Test = bg2.loc[(bg2["point_timestamp"] > "2017-06-05 23:59:00") & (bg2["point_timestamp"] <= "2017-07-11 23:59:00")]
bg2_actTest = bg[(bg["point_timestamp"] > "2017-06-05 23:59:00") & (bg["point_timestamp"] <= "2017-07-11 23:59:00")]
bg2_actTest["point_timestamp"] = pd.to_datetime(bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00")

bg2_actTest = bg2_actTest.groupby("point_timestamp").mean()
bg2_actTest["point_timestamp"] = bg2_actTest.index
bg2_actTest.index = range(0,len(bg2_actTest))
bg2_actTest = bg2_actTest.drop(columns = ["timezone_offset"], axis = 1)

##----- Assuming that the test data has a new starting point. Recalculating moving average -----##
bg2_Test["maverage"] = np.nan
bg2_Test["maverage"] = bg2_Test.loc[:, ["point_value.mg.dL"]].rolling(window=12, min_periods = 1).mean()

mavg2 = bg2_Test["maverage"].tolist()
mavg2.insert(0,np.nan)
mavg2.pop(len(mavg2)-1)
bg2_Test["maverage"] = mavg2
bg2_Test = bg2_Test.reset_index()
bg2_Test.loc[0,"maverage"] = bg2_Test.loc[1,"maverage"]
bg2_Test["maverage"] = round(bg2_Test["maverage"])

bg2_train["speed"] = bg2_train["point_value.kilometers"]/(5/60)
bg2_Test["speed"] = bg2_Test["point_value.kilometers"]/(5/60)
bg2_train["day_night"] = np.where((bg2_train['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") >= 7)
                                            & (bg2_train['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") < 19),
                                            1,0)

bg2_Test["day_night"] = np.where((bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") >= 7)
                                            & (bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[11:13].astype("int") < 19),
                                            1,0)

##----- Ensuring the 1 min difference in the actual test data matches the interpolated one to calculate actual MSE -----##
bg2_actTest["point_timestamp"] = np.where((bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 2)
            | (bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 7),
                                          pd.to_datetime(bg2_actTest['point_timestamp'] - pd.Timedelta(minutes=1)),
                                                         np.where((bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 0)
            | (bg2_actTest['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[15:16].astype("int") == 5), 
            pd.to_datetime(bg2_actTest['point_timestamp'] + pd.Timedelta(minutes=1)),bg2_actTest["point_timestamp"] + pd.Timedelta(minutes=0)))


##----- Scaling Data for Neural Nets -----##
bg2_train = bg2_train.drop(columns = ["grp"])
bg2_Test = bg2_Test.drop(columns = ["index","grp"])
a = pd.concat([bg2_train, bg2_Test])
scaled = a.copy()
scaled.loc[:,['point_value.mg.dL', 'point_value', 'point_value.kilometers',
        'Y', 'maverage', 'speed', 'day_night']] -= scaled.drop(columns = ["point_timestamp", "future"], axis=1).min()

scaled.loc[:,['point_value.mg.dL', 'point_value', 'point_value.kilometers',
        'Y', 'maverage', 'speed', 'day_night']] /= scaled.drop(columns = ["point_timestamp", "future"], axis=1).max()

scaled = scaled.drop(columns = ["point_timestamp", "future"])
scaled_train = scaled[0:len(bg2_train)]
scaled_test = scaled[(len(bg2_train)):len(scaled)]

scaled_test1 = scaled_test.copy()
scaled_test1["day_night"] = scaled_test1["day_night"].astype("int")
list_pred = []
seed(451)

model = Sequential()
model.add(Dense(3, input_dim=6, activation='linear'))
model.add(Dense(2, activation='linear'))
model.add(Dense(1, activation='linear'))
sgd = optimizers.SGD(lr=0.1)
#adam = optimizers.adam(lr = 0.1)
model.compile(loss='mean_squared_error', optimizer=sgd)

seed(454)
model.fit(scaled_train.loc[:, ['point_value.mg.dL', 'point_value', 'point_value.kilometers',
              'maverage', 'speed', 'day_night']], scaled_train.loc[:,"Y"], epochs=1000, batch_size=1000)

for i in range(1,13):
  pr_nn = model.predict(scaled_test1.drop(columns = ["Y"]))
  pr_nn1 = pr_nn * (max(a.Y) - min(a.Y)) + min(a.Y)
  pr_nn1 = pr_nn1.astype("int")
  list_pred.append(pr_nn1)
  scaled_test1["maverage"] = (scaled_test1["maverage"] + scaled_test1["point_value.mg.dL"]) / 2
  scaled_test1["point_value.mg.dL"] = pr_nn

nn_pred = pd.DataFrame.from_records(list_pred).transpose()
nn_pred = nn_pred.rename(columns = {0 : "Mins_5", 1 : "Mins_10", 2 : "Mins_15", 3 : "Mins_20", 4 : "Mins_25", 5 : "Mins_30", 6 : "Mins_35", 7 : "Mins_40",
                          8 : "Mins_45", 9 : "Mins_50", 10 : "Mins_55", 11 : "Mins_60"})

##----- Joining actual values for every 5 mins until 60 minutes -----##
bg2_actTest["cc"] = 1
bg2_Test = bg2_Test.merge(bg2_actTest.loc[:,["point_timestamp","cc"]], how = "left",left_on = "point_timestamp", right_on = "point_timestamp" )
bg2_Test.head()

bg2_Test = bg2_Test.drop(columns = ["Y"])
bg2_Test["point_value.mg.dL"] = np.where((bg2_Test["cc"] == 1 ), bg2_Test["point_value.mg.dL"], np.nan)

##----- Getting 5 min actual future values -----##
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_5_actual'})

##----- Getting 10 min actual future values -----##
bg2_Test["future10"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=10)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future10", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_10_actual'})

##----- Getting 15 min actual future values -----##
bg2_Test["future15"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=15)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future15", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_15_actual'})

##----- Getting 20 min actual future values -----##
bg2_Test["future20"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=20)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future20", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_20_actual'})

##----- Getting 25 min actual future values -----##
bg2_Test["future25"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=25)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future25", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_25_actual'})

##----- Getting 30 min actual future values -----##
bg2_Test["future30"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=30)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future30", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_30_actual'})

##----- Getting 35 min actual future values -----##
bg2_Test["future35"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=35)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future35", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_35_actual'})

##----- Getting 40 min actual future values -----##
bg2_Test["future40"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=40)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future40", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_40_actual'})

##----- Getting 45 min actual future values -----##
bg2_Test["future45"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=45)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future45", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_45_actual'})

##----- Getting 50 min actual future values -----##
bg2_Test["future50"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=50)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future50", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_50_actual'})

##----- Getting 55 min actual future values -----##
bg2_Test["future55"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=55)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future55", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_55_actual'})

##----- Getting 60 min actual future values -----##
bg2_Test["future60"] = pd.to_datetime(bg2_Test['point_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').str[:17]+"00") + pd.Timedelta(minutes=60)
bg2_Test = bg2_Test.merge(bg2_Test.loc[:,["point_timestamp", "point_value.mg.dL"]], how = "left", left_on = "future60", 
                          right_on = "point_timestamp")
bg2_Test = bg2_Test.drop(columns = ['point_timestamp_y'])
bg2_Test = bg2_Test.rename(columns = {'point_value.mg.dL_x' : 'point_value.mg.dL', 'point_timestamp_x': 'point_timestamp', 
  'point_value.mg.dL_y' : 'Mins_60_actual'})

##----- Creating a data frame with 1 hour trend of actual Blood sugar levels -----##
min_cols = [col for col in bg2_Test.columns if 'Mins_' in col]
act = bg2_Test.copy()
act = act.loc[:,min_cols]
nn_pred = nn_pred.astype("int")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the 

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [0]:
##----- Calculating Rmse -----##
diff_list = []
ard_list = []
for i in range(0,12):
  diff_list.append((act.iloc[:,i] - nn_pred.iloc[:,i])**2)
  ard_list.append( (abs(act.iloc[:,i] - nn_pred.iloc[:,i]) / nn_pred.iloc[:,i])*100 ) 
diff_tb = pd.DataFrame.from_records(diff_list).transpose()
ARD_tb = pd.DataFrame.from_records(ard_list).transpose()
diff_tb.head()
rmse = (sum(diff_tb.sum()) / ((len(diff_tb.columns) * len(diff_tb)) - sum(len(diff_tb) - diff_tb.count())))**0.5
MARD = sum(ARD_tb.sum()) / ((len(ARD_tb.columns) * len(ARD_tb)) - sum(len(ARD_tb) - ARD_tb.count()))
print("RMSE:", round(rmse,2))
print("MARD:", round(MARD,2))


RMSE: 26.06
MARD: 11.82


In [0]:
##----- Creating a dataframe to calculate the Parkes Zone for Type 1 Diabetes -----##
zones_list = []
for i in range(0, len(act)):
  if act.iloc[i,].count() == 12:
    zones_list.append(parkes_error_zone_detailed(act.iloc[i,], nn_pred.iloc[i,],1))
zones_nn = pd.DataFrame.from_records(zones_list)
zones_nn.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,A,A,A,A,A,B,B,B,B,B,B,B
1,A,A,A,A,B,B,B,B,B,B,B,B
2,A,A,A,A,B,B,B,B,B,B,B,B
3,A,A,A,A,B,B,B,B,B,B,B,B
4,A,A,A,A,A,B,B,B,B,B,B,B


In [0]:
##----- Creating a dataframe to calculate the Parkes Zone for Type 2 Diabetes -----##
zones_list2 = []
for i in range(0, len(act)):
  if act.iloc[i,].count() == 12:
    zones_list2.append(parkes_error_zone_detailed(act.iloc[i,], nn_pred.iloc[i,],2))
zones2_nn = pd.DataFrame.from_records(zones_list2)
zones2_nn.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,A,A,A,A,A,A,A,B,B,B,B,B
1,A,A,A,A,A,A,A,B,B,B,B,B
2,A,A,A,A,A,A,A,B,B,B,B,B
3,A,A,A,A,A,A,A,B,B,B,B,B
4,A,A,A,A,A,A,A,A,A,A,B,B


In [0]:
##----- Calculating Frequency for Test Data of Each Zone for Type 1 Diabetes -----##
freq = np.unique(zones_nn, return_counts = True)
freq = pd.DataFrame.from_records(freq).transpose()
freq["percentage"] = (freq[1] / (len(zones_nn) * len(zones_nn.columns))) * 100
freq = freq.rename(columns = {0: "Zone", 1: "Count"})
freq

Unnamed: 0,Zone,Count,percentage
0,A,81273,85.5253
1,B,12614,13.274
2,C,1116,1.17439
3,D,25,0.026308


In [0]:
##----- Calculating Frequency for Test Data of Each Zone for Type 2 Diabetes -----##
freq1 = np.unique(zones2_nn, return_counts = True)
freq1 = pd.DataFrame.from_records(freq1).transpose()
freq1["percentage"] = (freq1[1] / (len(zones2_nn) * len(zones2_nn.columns))) * 100
freq1 = freq1.rename(columns = {0: "Zone", 1: "Count"})
freq1

Unnamed: 0,Zone,Count,percentage
0,A,87579,92.1613
1,B,7072,7.44202
2,C,372,0.391464
3,D,5,0.00526161


In [0]:
##----- Checking Parkes Zones on Test Data for 60 min predictions for Type 1 Diabetes -----##
zones_nn.iloc[:,11].value_counts() / len((zones_nn)) * 100

A    67.988382
B    27.983331
C     3.876752
D     0.151534
Name: 11, dtype: float64

In [0]:
##----- Checking Parkes Zones on Test Data for 60 min predictions for Type 2 Diabetes -----##
zones2_nn.iloc[:,11].value_counts() / len((zones2_nn)) * 100

A    82.283117
B    16.277308
C     1.401692
D     0.037884
Name: 11, dtype: float64