In [10]:
import os
import sys

# Add modules path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
%notebook inline

from glycemic_patterns.model import Model
import pandas as pd
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

file = "data/ACN_20160331_20160414_FREE_STYLE.txt"
raw_data = pd.read_csv(file, header=0, skiprows=1, delimiter="\t", index_col=0,
                                       usecols=list(range(0, 9)),
                                       parse_dates=['Hora'], decimal=",",
                                       date_parser=lambda x: pd.to_datetime(x, format="%Y/%m/%d %H:%M"))                     
raw_data[["Hora", "Tipo de registro", "Histórico glucosa (mg/dL)"]].head()

Unnamed: 0_level_0,Hora,Tipo de registro,Histórico glucosa (mg/dL)
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
36781,2016-03-31 15:44:00,1,
36817,2016-03-31 15:43:00,0,61.0
36818,2016-03-31 15:58:00,0,61.0
36819,2016-03-31 16:13:00,0,71.0
36820,2016-03-31 16:28:00,0,80.0


In [11]:
data = raw_data[raw_data["Tipo de registro"] == 0][
["Hora","Histórico glucosa (mg/dL)"]].set_index("Hora", drop=True)
data.head()

Unnamed: 0_level_0,Histórico glucosa (mg/dL)
Hora,Unnamed: 1_level_1
2016-03-31 15:43:00,61.0
2016-03-31 15:58:00,61.0
2016-03-31 16:13:00,71.0
2016-03-31 16:28:00,80.0
2016-03-31 16:43:00,83.0


In [12]:
data.shift(periods=4).head(10)

Unnamed: 0_level_0,Histórico glucosa (mg/dL)
Hora,Unnamed: 1_level_1
2016-03-31 15:43:00,
2016-03-31 15:58:00,
2016-03-31 16:13:00,
2016-03-31 16:28:00,
2016-03-31 16:43:00,61.0
2016-03-31 16:58:00,61.0
2016-03-31 17:14:00,71.0
2016-03-31 17:29:00,80.0
2016-03-31 17:44:00,83.0
2016-03-31 17:59:00,83.0


In [13]:
data.shift(periods=-4).tail(10)

Unnamed: 0_level_0,Histórico glucosa (mg/dL)
Hora,Unnamed: 1_level_1
2016-04-14 03:45:00,264.0
2016-04-14 04:00:00,257.0
2016-04-14 04:15:00,250.0
2016-04-14 04:30:00,242.0
2016-04-14 04:45:00,231.0
2016-04-14 05:00:00,223.0
2016-04-14 05:15:00,
2016-04-14 05:30:00,
2016-04-14 05:45:00,
2016-04-14 06:00:00,


In [14]:
X = pd.concat([data.shift(periods=-1),data.shift(periods=1)], axis=1).dropna()
X.columns = ["Glucose_Minus_1h", "Glucose_Plus_1h" ]
X.head()

Unnamed: 0_level_0,Glucose_Minus_1h,Glucose_Plus_1h
Hora,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-03-31 15:58:00,71.0,61.0
2016-03-31 16:13:00,80.0,61.0
2016-03-31 16:28:00,83.0,71.0
2016-03-31 16:43:00,83.0,80.0
2016-03-31 16:58:00,75.0,83.0


In [15]:
def label_map(value):
    hypoglycemia_threshold = 70
    hyperglycemia_threshold = 180
    if value < hypoglycemia_threshold:
        return 0
    elif value > hyperglycemia_threshold:
           return 2
    else:
        return 1

In [16]:
y = data.loc[X.index].iloc[:,0].apply(label_map)
y.head()

Hora
2016-03-31 15:58:00    0
2016-03-31 16:13:00    1
2016-03-31 16:28:00    1
2016-03-31 16:43:00    1
2016-03-31 16:58:00    1
Name: Histórico glucosa (mg/dL), dtype: int64

In [17]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
clf = DecisionTreeClassifier(criterion='gini', splitter='best',
                                              max_depth=5,
                                              min_samples_split=2,
                                              min_samples_leaf=0.1,
                                              min_weight_fraction_leaf=0.0)
                                              
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.008, random_state=0)
train_index, test_index = next(sss.split(X, y))
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]                                           
clf.fit(X_train, y_train)

fig = plt.figure(figsize=(10, 8))
ax = plt.subplot()
fig = plot_decision_regions(X=X_test.as_matrix(),
                            y=y_test.as_matrix(), clf=clf, legend=2)
plt.title('DecisionTreeClassifier')
plt.show()

MemoryError: 