## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.linear_model import LassoCV
from sklearn.metrics import classification_report
import pickle
import plotly.graph_objects as go
import datetime
from utils.preprocessing_garmin import *
from utils.plotting_sleep import *

sid_shift_start_info = {"S1": datetime.date(2017, 3, 21),
                        "S2": datetime.date(2017, 9, 13),
                        "S3": datetime.date(2017, 9, 13),
                        "S4": datetime.date(2018, 2, 28),
                        "S5": datetime.date(2018, 2, 28),
                        }


Let's plot the subject's detected sleep periods by Garmin to gain an understanding of the need for sleep imputation.

In [2]:
sid = "S1"
aggregate_by_subject(sid)
aggregate_df = read_aggregate(sid)

summary_aggregate_df = preprocess_CRP(aggregate_df, original=True)
garmin_sleep_df = summary_aggregate_df.loc[summary_aggregate_df["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]
plot_sleep_2D(garmin_sleep_df)

Finished Loading Epochs
Finished Loading Heart Rates
Finished Loading Sleeps


This looks pretty consistent, and all sleep periods were tracked by garmin. This person has a regular sleep schedule. Let's look at someone with a irregular sleep schedule. 

In [3]:
sid = "S4"
start_shift_date = sid_shift_start_info[sid]
aggregate_by_subject(sid)
aggregate_df = read_aggregate(sid)

summary_aggregate_df = preprocess_CRP(aggregate_df, original=True)
garmin_sleep_df = summary_aggregate_df.loc[summary_aggregate_df["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]
plot_sleep_2D(garmin_sleep_df, plot_schedule_change=True, start_shift_date=start_shift_date)

Finished Loading Epochs
Finished Loading Heart Rates
Finished Loading Sleeps


It seems like Garmin didn't do a very good job picking up the sleep labels, even though we know that this subject wears this wearable device almost always. Now let's build a simple sleep imputation method using logistic regression to impute these information. 

In [5]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

features, garmin_sleep_labels = extract_Xy(sid)

clf = Pipeline([('scaler', StandardScaler()),
               ('classifier', LogisticRegressionCV(penalty='l1', solver='saga'))])

clf.fit(features, garmin_sleep_labels)
pred_sleep_labels = clf.predict(features)
print("Individualized model: ")
print(classification_report(garmin_sleep_labels, pred_sleep_labels))




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did not converge


The max_iter was reached which means the coef_ did no

Individualized model: 
              precision    recall  f1-score   support

           0       0.97      0.94      0.95      1515
           1       0.94      0.97      0.96      1618

    accuracy                           0.96      3133
   macro avg       0.96      0.96      0.96      3133
weighted avg       0.96      0.96      0.96      3133




The max_iter was reached which means the coef_ did not converge



In [6]:
aggre_fe = aggregate_feature_transform(aggregate_df)
y_pred = clf.predict(aggre_fe)

aggregate_df['predictedSleep'] = y_pred
small_CRP = preprocess_CRP(aggregate_df)

garmin_sleep_df = small_CRP.loc[small_CRP["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]
pred_sleep_df = small_CRP.loc[small_CRP["predictedSleep"] == 1, [
    "Date", "Time_In_Day"]]

plot_sleep_2D(garmin_sleep_df, 
            plot_pred = True, plot_schedule_change = True, 
            pred_sleep_df = pred_sleep_df, start_shift_date = start_shift_date)


In [None]:
# report major sleep periods

Let's try for another subject who has irregular sleep.

In [4]:
sid = "S5"
start_shift_date = sid_shift_start_info[sid]
aggregate_by_subject(sid)
aggregate_df = read_aggregate(sid)

summary_aggregate_df = preprocess_CRP(aggregate_df, original=True)
garmin_sleep_df = summary_aggregate_df.loc[summary_aggregate_df["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]

plot_sleep_2D(garmin_sleep_df, plot_schedule_change=True, start_shift_date=start_shift_date)

Finished Loading Epochs
Finished Loading Heart Rates
Finished Loading Sleeps


In [9]:
aggre_fe = aggregate_feature_transform(aggregate_df)
features, garmin_sleep_labels = extract_Xy(sid)

clf = Pipeline([('scaler', StandardScaler()),
               ('classifier', LogisticRegressionCV(penalty='l1', solver='saga'))])

clf.fit(features, garmin_sleep_labels)
y_pred = clf.predict(aggre_fe)

aggregate_df['predictedSleep'] = y_pred
small_CRP = preprocess_CRP(aggregate_df)

garmin_sleep_df = small_CRP.loc[small_CRP["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]
pred_sleep_df = small_CRP.loc[small_CRP["predictedSleep"] == 1, [
    "Date", "Time_In_Day"]]
plot_sleep_2D(garmin_sleep_df, 
            plot_pred = True, plot_schedule_change = True, 
            pred_sleep_df = pred_sleep_df, start_shift_date = start_shift_date)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:
sid = "S4"
start_shift_date = sid_shift_start_info[sid]
aggregate_by_subject(sid)
aggregate_df = read_aggregate(sid)

summary_aggregate_df = preprocess_CRP(aggregate_df, original=True)
garmin_sleep_df = summary_aggregate_df.loc[summary_aggregate_df["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]
plot_sleep_2D(garmin_sleep_df, plot_schedule_change=True, start_shift_date=start_shift_date)

In [18]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

features, garmin_sleep_labels = extract_Xy(sid)
  
# defining parameter range
param_grid = {'svc__C': [0.1, 1, 10, 100, 1000], 
              'svc__gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'svc__kernel': ['rbf']} 

clf = Pipeline(steps=[('scaler', StandardScaler()),
               ('svc', SVC())])

grid = GridSearchCV(clf, param_grid, verbose = 1, n_jobs = 5)

grid.fit(features, garmin_sleep_labels)
pred_sleep_labels = grid.predict(features)
print("Individualized model: ")
print(classification_report(garmin_sleep_labels, pred_sleep_labels))




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Fitting 5 folds for each of 25 candidates, totalling 125 fits
Individualized model: 
              precision    recall  f1-score   support

           0       0.97      0.94      0.96      1515
           1       0.95      0.97      0.96      1618

    accuracy                           0.96      3133
   macro avg       0.96      0.96      0.96      3133
weighted avg       0.96      0.96      0.96      3133



In [19]:
aggre_fe = aggregate_feature_transform(aggregate_df)
y_pred = grid.predict(aggre_fe)

aggregate_df['predictedSleep'] = y_pred
small_CRP = preprocess_CRP(aggregate_df)

garmin_sleep_df = small_CRP.loc[small_CRP["sleepLabel"] == 1, [
    "Date", "Time_In_Day"]]
pred_sleep_df = small_CRP.loc[small_CRP["predictedSleep"] == 1, [
    "Date", "Time_In_Day"]]

plot_sleep_2D(garmin_sleep_df,
              plot_pred=True, plot_schedule_change=True,
              pred_sleep_df=pred_sleep_df, start_shift_date=start_shift_date)
