# Generate PA plan and breaks between sedentary bouts for the coming week

Prompt the user to sync data and run this script every Monday morning at 9am.

1. get date of today
2. pull data from the last 7 weeks or all available data less than 7 weeks
3. get user preference for total MET and mode (MVPA/VPA)
4. generate PA plan
5. get calendar data for the week
6. populate "busy" intervals
7. predict time to send prompts to break up sedentary bouts

In [139]:
import pandas as pd
import json
import glob
import math
import datetime as dt
# import configparser
# import os
# from datetime import datetime
# from xlrd.xldate import xldate_as_tuple

today = '2016-09-26'  
# use '2016-09-26' for simulation only, for production change to dt.datetime.today().strftime("%Y-%m-%d")
timestamp_list = pd.date_range(today, periods=96, freq='15min').strftime('%H:%M:%S')
heart_files = glob.glob("heart*.json")

In [2]:
# generate dates of past seven weeks
from pandas.tseries.offsets import *
sevenweeksago = pd.to_datetime(today) - DateOffset(weeks=7)
seven_weeks_date_list = pd.date_range(sevenweeksago, periods=49, freq='D').strftime('%Y-%m-%d')
seven_weeks_date_list

array(['2016-08-08', '2016-08-09', '2016-08-10', '2016-08-11',
       '2016-08-12', '2016-08-13', '2016-08-14', '2016-08-15',
       '2016-08-16', '2016-08-17', '2016-08-18', '2016-08-19',
       '2016-08-20', '2016-08-21', '2016-08-22', '2016-08-23',
       '2016-08-24', '2016-08-25', '2016-08-26', '2016-08-27',
       '2016-08-28', '2016-08-29', '2016-08-30', '2016-08-31',
       '2016-09-01', '2016-09-02', '2016-09-03', '2016-09-04',
       '2016-09-05', '2016-09-06', '2016-09-07', '2016-09-08',
       '2016-09-09', '2016-09-10', '2016-09-11', '2016-09-12',
       '2016-09-13', '2016-09-14', '2016-09-15', '2016-09-16',
       '2016-09-17', '2016-09-18', '2016-09-19', '2016-09-20',
       '2016-09-21', '2016-09-22', '2016-09-23', '2016-09-24', '2016-09-25'], 
      dtype='<U10')

In [3]:
# check if requested data are complete
date_list = []
for i in heart_files:
    date = i.split(".json")[0].split('heart')[1]
    date_list.append(date)
list_ = []
for d in date_list:
    # load step and heart rate for each day
    heart_fname = 'heart'+ d + '.json'
    step_fname = 'step'+ d + '.json'
    with open(heart_fname) as heart_data_file:    
        heart_data = json.load(heart_data_file)
    with open(step_fname) as step_data_file:    
        step_data = json.load(step_data_file)
        
    # get heart rate and step data
    heart_data = pd.DataFrame.from_dict(heart_data['activities-heart-intraday']['dataset'])
    heart_data = heart_data.set_index('time')
    step_data = pd.DataFrame.from_dict(step_data['activities-steps-intraday']['dataset'])
    step_data = step_data.set_index('time')
    # fill in missing data with 0 TODO: or NaN?
    complete_data = pd.DataFrame({'time': timestamp_list,
                                  'time_count': range(len(timestamp_list)),
                                  'heart': 0*len(timestamp_list), 
                                  'step': 0*len(timestamp_list), 
                                  'date': d}, index = timestamp_list)
    for i in complete_data.index:
        try:
            complete_data.loc[i, 'heart'] = heart_data.loc[i, 'value']
            complete_data.loc[i, 'step'] = step_data.loc[i, 'value']
        except:
            next
    list_.append(complete_data)
userData = pd.concat(list_)
weekday = []
week = []

for i in userData.date:
    weekday.append(pd.to_datetime(i).dayofweek)
    week.append(pd.to_datetime(i).week)
    
userData['weekday'] = weekday
userData['week'] = week
# print(heart_data)

print(userData.head(100))

                date  heart  step      time  time_count  weekday  week
00:00:00  2016-08-21     74     0  00:00:00           0        6    33
00:15:00  2016-08-21      0     0  00:15:00           1        6    33
00:30:00  2016-08-21      0     0  00:30:00           2        6    33
00:45:00  2016-08-21      0     0  00:45:00           3        6    33
01:00:00  2016-08-21      0     0  01:00:00           4        6    33
01:15:00  2016-08-21      0     0  01:15:00           5        6    33
01:30:00  2016-08-21      0     0  01:30:00           6        6    33
01:45:00  2016-08-21      0     0  01:45:00           7        6    33
02:00:00  2016-08-21      0     0  02:00:00           8        6    33
02:15:00  2016-08-21      0     0  02:15:00           9        6    33
02:30:00  2016-08-21      0     0  02:30:00          10        6    33
02:45:00  2016-08-21      0     0  02:45:00          11        6    33
03:00:00  2016-08-21      0     0  03:00:00          12        6    33
03:15:

## evaluate activity level MVPA, VPA, MET

#### what is the cut-off value for step-count? For walking is 100 step/min for MVPA or above (https://www.ncbi.nlm.nih.gov/pubmed/16485518), but some MVPA or VPA have less step per minute than walking



In [4]:
age = 21
maxHR = 220 - age
step_cutoff = 0

# mark out MVPA intervals

MVPA = (userData.heart >= 0.64 * maxHR) & (userData.heart < 0.77 * maxHR) & (userData.step > step_cutoff)
userData['MVPAselect'] = MVPA
userData['MVPA'] = 0
userData.loc[userData.MVPAselect, 'MVPA'] = 1
userData.drop('MVPAselect', axis=1, inplace=True)
# mark out VPA intervals
VPA = (userData.heart >= 0.77 * maxHR) & (userData.step > step_cutoff)
userData['VPAselect'] = VPA
userData['VPA'] = 0
userData.loc[userData.VPAselect, 'VPA'] = 1
userData.drop('VPAselect', axis=1, inplace=True)

# calculate MET
userData['MET'] = userData['MVPA'] * 4 * 15 + userData['VPA'] * 8 * 15

In [None]:
userData
# get the last 7 weeks' data. If less than 7 weeks, get all data available. 
# (7 days required for sedentary, but that is for one week)
# DF structure: userID, date, heartrate, step, time, time_index, weekday, week, MVPA, VPA, MET, sedentary, busy

In [None]:
userData[userData['MET'] > 0]

### 1. Determine if MET is evenly distributed


In [5]:
# get daily total MET
weekday_grouped = userData.groupby(['week', 'weekday'])
SumofMETbyWeekday = pd.DataFrame(weekday_grouped['MET'].sum()).reset_index()
# get mean of daily MET of a week (week starts on Monday)
week_MET_grouped = SumofMETbyWeekday.groupby('week')
MeanofMETbyWeek = pd.DataFrame(week_MET_grouped['MET'].mean()).reset_index()
MeanofMETbyWeek = MeanofMETbyWeek.set_index('week')
# get weekly total MET (week starts on Monday)
SumofMETbyWeek = pd.DataFrame(week_MET_grouped['MET'].sum()).reset_index()

# differentiate A1 and A2
SumofMETbyWeekday['even'] = 0
SumofMETbyWeekday['active'] = 0
SumofMETbyWeekday['sedentary'] = 0


for i in SumofMETbyWeekday.index:
    week_number = SumofMETbyWeekday.loc[i, 'week']
    week_mean = MeanofMETbyWeek.loc[week_number, 'MET']
    if SumofMETbyWeekday.loc[i, 'MET'] == week_mean:
        SumofMETbyWeekday.loc[i, 'even'] = 1
    elif SumofMETbyWeekday.loc[i, 'MET'] > week_mean:
        SumofMETbyWeekday.loc[i, 'active'] = 1
    elif SumofMETbyWeekday.loc[i, 'MET'] < week_mean:
        SumofMETbyWeekday.loc[i, 'sedentary'] = 1

print(SumofMETbyWeekday)


    week  weekday  MET  even  active  sedentary
0     33        6    0     1       0          0
1     34        0    0     1       0          0
2     34        1    0     1       0          0
3     34        2    0     1       0          0
4     34        3    0     1       0          0
5     34        4    0     1       0          0
6     34        5    0     1       0          0
7     34        6    0     1       0          0
8     35        0    0     0       0          1
9     35        1    0     0       0          1
10    35        2    0     0       0          1
11    35        3    0     0       0          1
12    35        4   60     0       1          0
13    35        5    0     0       0          1
14    36        0   60     0       1          0
15    36        1    0     0       0          1
16    36        2    0     0       0          1
17    36        3    0     0       0          1
18    36        4    0     0       0          1
19    36        5  180     0       1    

In [6]:
check_even_grouped = SumofMETbyWeekday.groupby(['week'])
check_even = pd.DataFrame(check_even_grouped['even'].sum()).reset_index()
print(check_even)


if check_even['even'].iloc[-1] == 7:
    strategy = 'A1'
else:
    strategy = 'A2'

print(strategy)

   week  even
0    33     1
1    34     7
2    35     0
3    36     0
4    37     7
5    38     0
A2


### strategy: A1

In [7]:
# User's goal and PA choice
goal = 800
choice = "MVPA"

# retrieve daily MET of the last week
lastweekMET = SumofMETbyWeekday.loc[SumofMETbyWeekday['week'] == check_even['week'].iloc[-1]]
# print(lastweekMET)

daysPArequired = pd.Series([0,1,2,3,4,5,6])
# print(daysPArequired)

METrequired = goal - lastweekMET.MET.sum()
# select x days out of daysPArequired
if choice == 'MVPA':
    numofBouts = math.ceil(METrequired/(4 * 15))
elif choice == 'VPA':
    numofBouts = math.ceil(METrequired/(8 * 15))
    
baseBouts = math.floor(numofBouts/daysPArequired.count())
extraBouts = numofBouts % daysPArequired.count()
plan = pd.DataFrame({'weekday': daysPArequired.tolist(), 'choice': choice, 'bouts': baseBouts})
daysMoreBouts = daysPArequired.sample(extraBouts)
for i in daysMoreBouts:
    plan.loc[plan['weekday'] == i, 'bouts'] += 1
print(plan)

   bouts choice  weekday
0      2   MVPA        0
1      2   MVPA        1
2      1   MVPA        2
3      2   MVPA        3
4      2   MVPA        4
5      1   MVPA        5
6      2   MVPA        6


### strategy: A2

In [8]:
# User's goal and PA choice
goal = 800
choice = "VPA"

# retrieve daily MET of the last week
lastweekMET = SumofMETbyWeekday.loc[SumofMETbyWeekday['week'] == check_even['week'].iloc[-1]]
# print(lastweekMET)

daysPArequired = lastweekMET.loc[lastweekMET.sedentary == 1, 'weekday']
# print(daysPArequired)

METrequired = goal - lastweekMET.MET.sum()
# select x days out of daysPArequired
if choice == 'MVPA':
    numofBouts = math.ceil(METrequired/(4 * 15))
elif choice == 'VPA':
    numofBouts = math.ceil(METrequired/(8 * 15))
    
baseBouts = math.floor(numofBouts/daysPArequired.count())
extraBouts = numofBouts % daysPArequired.count()
plan = pd.DataFrame({'weekday': daysPArequired.tolist(), 'choice': choice, 'bouts': baseBouts})
daysMoreBouts = daysPArequired.sample(extraBouts)
for i in daysMoreBouts:
    plan.loc[plan['weekday'] == i, 'bouts'] += 1
print(plan)

   bouts choice  weekday
0      1    VPA        1
1      1    VPA        2
2      1    VPA        3
3      2    VPA        4
4      1    VPA        6


### Determine the time to send break alert

In [9]:
sedentarySelect = (userData.heart > 0) & (userData.step == 0) & (userData.time_count > 32)
userData['sedentarySelect'] = sedentarySelect
userData['sedentaryAccum'] = 0
try:
    userData = userData.reset_index()
except ValueError:
    print("index resetted")

userData

Unnamed: 0,index,date,heart,step,time,time_count,weekday,week,MVPA,VPA,MET,sedentarySelect,sedentaryAccum
0,00:00:00,2016-08-21,74,0,00:00:00,0,6,33,0,0,0,False,0
1,00:15:00,2016-08-21,0,0,00:15:00,1,6,33,0,0,0,False,0
2,00:30:00,2016-08-21,0,0,00:30:00,2,6,33,0,0,0,False,0
3,00:45:00,2016-08-21,0,0,00:45:00,3,6,33,0,0,0,False,0
4,01:00:00,2016-08-21,0,0,01:00:00,4,6,33,0,0,0,False,0
5,01:15:00,2016-08-21,0,0,01:15:00,5,6,33,0,0,0,False,0
6,01:30:00,2016-08-21,0,0,01:30:00,6,6,33,0,0,0,False,0
7,01:45:00,2016-08-21,0,0,01:45:00,7,6,33,0,0,0,False,0
8,02:00:00,2016-08-21,0,0,02:00:00,8,6,33,0,0,0,False,0
9,02:15:00,2016-08-21,0,0,02:15:00,9,6,33,0,0,0,False,0


In [10]:
# screen sedentary bouts >= 90min
# TODO: change sedentarySelect to sedentary
def CountSedentary(userDT):
    m = 0
    n = 1
    while n < len(userDT):
        if userDT.loc[m, 'sedentarySelect']:
            if userDT.loc[n, 'sedentarySelect']:
                # check if consecutive
                t_1 = pd.to_datetime(userDT.loc[n - 1, 'date'] + ' ' + userDT.loc[n - 1, 'time'])            
                t_2 = pd.to_datetime(userDT.loc[n, 'date'] + ' ' + userDT.loc[n, 'time'])
                timeDelta = t_2 - t_1
                if timeDelta == dt.timedelta(minutes=15):
                    userData.loc[m, 'sedentaryAccum'] += 1
                    n += 1
                else:
                    m = n
                    n = m + 1
            else:
                m = n + 1
                n = m + 1
        else:
            m = n
            n += 1
            
    return(userDT)


In [11]:
userData = CountSedentary(userData)

In [74]:
userData

Unnamed: 0,index,date,heart,step,time,time_count,weekday,week,MVPA,VPA,MET,sedentarySelect,sedentaryAccum,sedentary,sedentaryBouts
0,00:00:00,2016-08-21,74,0,00:00:00,0,6,33,0,0,0,False,0,0,0
1,00:15:00,2016-08-21,0,0,00:15:00,1,6,33,0,0,0,False,0,0,0
2,00:30:00,2016-08-21,0,0,00:30:00,2,6,33,0,0,0,False,0,0,0
3,00:45:00,2016-08-21,0,0,00:45:00,3,6,33,0,0,0,False,0,0,0
4,01:00:00,2016-08-21,0,0,01:00:00,4,6,33,0,0,0,False,0,0,0
5,01:15:00,2016-08-21,0,0,01:15:00,5,6,33,0,0,0,False,0,0,0
6,01:30:00,2016-08-21,0,0,01:30:00,6,6,33,0,0,0,False,0,0,0
7,01:45:00,2016-08-21,0,0,01:45:00,7,6,33,0,0,0,False,0,0,0
8,02:00:00,2016-08-21,0,0,02:00:00,8,6,33,0,0,0,False,0,0,0
9,02:15:00,2016-08-21,0,0,02:15:00,9,6,33,0,0,0,False,0,0,0


In [76]:
# as accumulation starts from 2 consecutive 15min sedentary bouts, 5 means 90min of sedentary time
userData[userData['sedentaryAccum'] >= 5]
userData[userData['date'] == '2016-08-24']

Unnamed: 0,index,date,heart,step,time,time_count,weekday,week,MVPA,VPA,MET,sedentarySelect,sedentaryAccum,sedentary,sedentaryBouts
288,00:00:00,2016-08-24,64,0,00:00:00,0,2,34,0,0,0,False,0,0,0
289,00:15:00,2016-08-24,80,102,00:15:00,1,2,34,0,0,0,False,0,0,0
290,00:30:00,2016-08-24,77,43,00:30:00,2,2,34,0,0,0,False,0,0,0
291,00:45:00,2016-08-24,60,0,00:45:00,3,2,34,0,0,0,False,0,0,0
292,01:00:00,2016-08-24,58,0,01:00:00,4,2,34,0,0,0,False,0,0,0
293,01:15:00,2016-08-24,61,0,01:15:00,5,2,34,0,0,0,False,0,0,0
294,01:30:00,2016-08-24,60,0,01:30:00,6,2,34,0,0,0,False,0,0,0
295,01:45:00,2016-08-24,63,7,01:45:00,7,2,34,0,0,0,False,0,0,0
296,02:00:00,2016-08-24,69,0,02:00:00,8,2,34,0,0,0,False,0,0,0
297,02:15:00,2016-08-24,65,0,02:15:00,9,2,34,0,0,0,False,0,0,0


In [73]:
# fill in sedentary bouts >= 90 min with 1
userData['sedentaryBouts'] = 0
i = 0
while i < len(userData):
    n_bout = userData.sedentaryAccum[i]
    if n_bout >= 5: # >= 90min
        j = i
        while j <= i + n_bout:
            userData.loc[j, 'sedentaryBouts'] = 1
            j += 1
    i += 1
            


In [72]:
len(userData)

3360

# Plan A predict the time when sedentary bouts >= 90min start

In [137]:
from sklearn.tree import DecisionTreeClassifier, export_graphviz

# feature: time_count, weekday, week?
features = list(userData.columns[5:8])
print("* features:", features, sep="\n")

y = userData['sedentaryAccum']
X = userData[features]
dt1 = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt1 = dt1.fit(X, y)

* features:
['time_count', 'weekday', 'week']


In [138]:

dt1.feature_importances_

array([ 0.76418683,  0.09542017,  0.140393  ])

In [59]:
dt1.classes_

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [60]:
dt1.score(X,y)

0.94434523809523807

In [61]:
print(dt1.predict([[56, 0, 42]]))
dt1.predict_proba([[62, 0, 42]])

[0]


array([[ 0.85714286,  0.        ,  0.14285714,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ]])

# Plan 2 convert true/false into 0,1; predict 0,1 and find when the user will have prolonged sedentary bouts

In [47]:
import numpy as np
Y_sedentary = np.where(userData.sedentarySelect == True,1,0)
userData['sedentary'] = Y_sedentary

In [54]:
features = list(userData.columns[5:8])
print("* features:", features, sep="\n")

y2 = userData['sedentary']
X2 = userData[features]
dt2 = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt2 = dt2.fit(X2, y2)

* features:
['time_count', 'weekday', 'week']


In [55]:
dt2.feature_importances_

array([ 0.70140085,  0.14917261,  0.14942654])

In [56]:
dt2.classes_

array([0, 1])

In [68]:
print(dt2.predict([[80, 0, 42]]))
dt2.predict_proba([[72, 0, 42]])

[0]


array([[ 0.94444444,  0.05555556]])

In [69]:
dt2.score(X2,y2)

0.85446428571428568

In [145]:
BreakPlan = pd.DataFrame({'date': np.repeat(pd.date_range(today, periods=7, freq='D').strftime('%Y-%m-%d'), 96, axis=0),
                          'weekday': np.repeat(pd.Series(range(7)).tolist(), 96),
                          'time': pd.date_range(today, periods=96, freq='15min').strftime('%H:%M:%S').tolist() * 7,
                          'time_count': pd.Series(range(96)).tolist() * 7,
                          'week': pd.to_datetime(today).week})
predict_features = list(userData.columns[5:8])
print("* features:", features, sep="\n")

X_predict = BreakPlan[features]
predicted_sedentary = dt2.predict(X_predict)
BreakPlan['sedentary'] = predicted_sedentary

def CountSedentary2(userDT):
    m = 0
    n = 1
    userDT['sedentaryAccum'] = 0
    while n < len(userDT):
        if userDT.loc[m, 'sedentary'] == 1:
            if userDT.loc[n, 'sedentary'] == 1:
                # check if consecutive
                t_1 = pd.to_datetime(userDT.loc[n - 1, 'date'] + ' ' + userDT.loc[n - 1, 'time'])            
                t_2 = pd.to_datetime(userDT.loc[n, 'date'] + ' ' + userDT.loc[n, 'time'])
                timeDelta = t_2 - t_1
                if timeDelta == dt.timedelta(minutes=15):
                    userData.loc[m, 'sedentaryAccum'] += 1
                    n += 1
                else:
                    m = n
                    n = m + 1
            else:
                m = n + 1
                n = m + 1
        else:
            m = n
            n += 1
            
    return(userDT)

BreakPlan = CountSedentary2(BreakPlan)
BreakPlan['sedentaryBouts'] = 0
i = 0
while i < len(BreakPlan):
    n_bout = BreakPlan.sedentaryAccum[i]
    if n_bout >= 5: # >= 90min
        j = i
        while j <= i + n_bout:
            userData.loc[j, 'sedentaryBouts'] = 1
            j += 1
    i += 1
    
BreakPlan[BreakPlan['sedentaryAccum'] > 0]

* features:
['time_count', 'weekday', 'week']


Unnamed: 0,date,time,time_count,week,weekday,sedentary,sedentaryAccum,sedentaryBouts


In [144]:
BreakPlan[BreakPlan['sedentary'] > 0]

Unnamed: 0,date,time,time_count,week,weekday,sedentary,sedentaryAccum,sedentaryBouts
33,2016-09-26,08:15:00,33,39,0,1,0,0
34,2016-09-26,08:30:00,34,39,0,1,0,0
35,2016-09-26,08:45:00,35,39,0,1,0,0
36,2016-09-26,09:00:00,36,39,0,1,0,0
37,2016-09-26,09:15:00,37,39,0,1,0,0
41,2016-09-26,10:15:00,41,39,0,1,0,0
42,2016-09-26,10:30:00,42,39,0,1,0,0
43,2016-09-26,10:45:00,43,39,0,1,0,0
44,2016-09-26,11:00:00,44,39,0,1,0,0
45,2016-09-26,11:15:00,45,39,0,1,0,0


# Plan 3 fill bouts >= 90min with 1 and predict bouts >= 90min

In [111]:
features = list(userData.columns[5:8])
print("* features:", features, sep="\n")

y3 = userData['sedentaryBouts']
X3 = userData[features]
dt3 = DecisionTreeClassifier(min_samples_split=i, random_state=99)
dt3 = dt3.fit(X3, y3)
print('accurage of prediction: %f' % dt3.score(X2,y2))


* features:
['time_count', 'weekday', 'week']
accurage of prediction: 0.778274


In [101]:
dt3.feature_importances_

array([ 0.5685657,  0.218522 ,  0.2129123])

In [80]:
dt3.classes_

array([0, 1])

In [82]:
print(dt3.predict([[56, 0, 42]]))
dt3.predict_proba([[56, 0, 42]])

[1]


array([[ 0.45454545,  0.54545455]])

In [128]:
range(7)

range(0, 7)

In [131]:
# construct dataframe for writing break-sedentary data

BreakPlan = pd.DataFrame({'date': np.repeat(pd.date_range(today, periods=7, freq='D').strftime('%Y-%m-%d'), 96, axis=0),
                          'weekday': np.repeat(pd.Series(range(7)).tolist(), 96),
                          'time': pd.date_range(today, periods=96, freq='15min').strftime('%H:%M:%S').tolist() * 7,
                          'time_count': pd.Series(range(96)).tolist() * 7,
                          'week': pd.to_datetime(today).week})
BreakPlan

Unnamed: 0,date,time,time_count,week,weekday
0,2016-09-26,00:00:00,0,39,0
1,2016-09-26,00:15:00,1,39,0
2,2016-09-26,00:30:00,2,39,0
3,2016-09-26,00:45:00,3,39,0
4,2016-09-26,01:00:00,4,39,0
5,2016-09-26,01:15:00,5,39,0
6,2016-09-26,01:30:00,6,39,0
7,2016-09-26,01:45:00,7,39,0
8,2016-09-26,02:00:00,8,39,0
9,2016-09-26,02:15:00,9,39,0


In [133]:
predict_features = list(userData.columns[5:8])
print("* features:", features, sep="\n")

X_predict = BreakPlan[features]
predicted_sedentaryBouts = dt3.predict(X_predict)

* features:
['time_count', 'weekday', 'week']


In [135]:
BreakPlan['sedentaryBouts'] = predicted_sedentaryBouts
BreakPlan[BreakPlan['sedentaryBouts'] > 0]

Unnamed: 0,date,time,time_count,week,weekday,sedentaryBouts
41,2016-09-26,10:15:00,41,39,0,1
42,2016-09-26,10:30:00,42,39,0,1
43,2016-09-26,10:45:00,43,39,0,1
44,2016-09-26,11:00:00,44,39,0,1
45,2016-09-26,11:15:00,45,39,0,1
46,2016-09-26,11:30:00,46,39,0,1
47,2016-09-26,11:45:00,47,39,0,1
48,2016-09-26,12:00:00,48,39,0,1
49,2016-09-26,12:15:00,49,39,0,1
50,2016-09-26,12:30:00,50,39,0,1


In [None]:
def visualize_tree(tree, feature_names):
    """Create tree png using graphviz.

    Args
    ----
    tree -- scikit-learn DecsisionTree.
    feature_names -- list of feature names.
    """
    with open("dt.dot", 'w') as f:
        export_graphviz(tree, out_file=f,
                        feature_names=feature_names)

    command = ["dot", "-Tpng", "dt.dot", "-o", "dt.png"]
    try:
        subprocess.check_call(command)
    except:
        exit("Could not run dot, ie graphviz, to "
             "produce visualization")


In [None]:
visualize_tree(dt, features)

In [None]:
userData[userData['date'] == '2016-09-01']

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit([SumofMVPAbyWeekday['weekday']], SumofMVPAbyWeekday['MET'])

In [None]:
import numpy as np
from detect_peaks import detect_peaks

# plot to see

In [None]:
from bokeh.charts import BoxPlot, Bar, Scatter, output_file, show
from bokeh.io import output_notebook

#### daily total steps

In [None]:
step_weekday_grouped = userData.groupby(['week', 'weekday'])
SumofstepbyWeekday = step_weekday_grouped['step'].sum()
print(SumofstepbyWeekday.index)

len(SumofstepbyWeekday.index)

In [None]:
output_notebook()
for i in range(7):
    DayData = userData[userData['weekday'] == i]
    s = Scatter(DayData, y='step', x='time_count',
            title=str(i), plot_width=1000, legend=False)
    show(s)

In [None]:
Mon = userData[userData['weekday'] == 0]
p = BoxPlot(Mon, values='step', label='time_count',
            title="MVPA of Mon 3weeks", plot_width=1200, legend=False)
b = Bar(userData, values='MVPA', label='weekday', agg='sum',
            title="MVPA of Mon 3weeks", plot_width=1200, legend=False)
s = Scatter(Mon, y='step', x='time_count',
            title="MVPA of Mon 3weeks", plot_width=1200, legend=False)
# output_file("boxplot.html")
output_notebook()
show(p)
show(b)
show(s)





In [None]:
from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(userData, label='time_count', values='MVPA', agg='mean',
        title="activity by 15-min", plot_width=1200)

output_file("bar.html")

show(p)

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit([userData['time_count'], userData['activity']], )

In [None]:
clf.predict([1])