In [2]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")
import time

In [17]:
sleeps = pd.read_csv('sleeps.csv')
sleeps = sleeps[sleeps.user_id!=2509]

In [18]:
def epoch_end_convert(df):
    '''
    takes dataframe and converts date column to epoch milliseconds
    inputs:
        df = dataframe name
    '''
    try:
        return (int(time.mktime(time.strptime(df['end'], '%Y-%m-%d %H:%M:%S'))) - 14400)*1000
    except:
        #handles activities that have fractions of a second included
        return (int(time.mktime(time.strptime(df['end'][:19], '%Y-%m-%d %H:%M:%S'))) - 14400)*1000

In [22]:
sleeps['end_epoch'] = sleeps.apply(epoch_end_convert, axis=1)

In [27]:
#drop if HRV = NaN
sleeps_only = sleeps[np.isnan(sleeps.hrv_rmssd)==False]

In [29]:
sleeps_only.head()

Unnamed: 0,user_id,start,end,score,time_in_bed,latency,disturbances,wake_duration,light_sleep_duration,slow_wave_sleep_duration,rem_sleep_duration,cycles_count,debt_post,is_nap,recovery_score,resting_heart_rate,hrv_rmssd,end_epoch
0,828,2015-09-15 02:42:00,2015-09-15 12:09:00,82,34020000,2100132,27,6870000,22230000,3600000,1320000,2,4266209.55,f,63,49,0.064647,1442318940000
1,828,2015-09-16 02:03:00,2015-09-16 12:06:00,87,36180000,2790134,25,6630000,22860000,3210000,3480000,3,2436815.25,f,74,45,0.07567,1442405160000
2,828,2015-09-17 00:36:00,2015-09-17 12:15:00,97,41940000,7800470,24,10410000,25680000,3240000,2580000,4,513169.69,f,68,44,0.036925,1442492100000
3,828,2015-09-18 02:38:18.552,2015-09-18 12:24:08.679,94,35150127,1410042,32,3960000,24810000,2160000,4230000,7,1425316.75,f,90,53,0.098787,1442579048000
5,828,2015-09-20 03:38:43.029,2015-09-20 11:44:39.378,70,29156349,1560075,18,4260000,18090000,2910000,3900000,6,7668000.0,f,42,43,0.050816,1442749479000


#Adding In Race Periods

In [30]:
date_markers = pd.read_csv('epoch_dates.csv')

In [31]:
date_markers.head()

Unnamed: 0,ET_Date,date_start_epoch,race_period,bike_period,race_period_start,race_period_end,bike_period_start,bike_period_end
0,9/15/2015 0:00,1442289600000,1,1,1,0,1,0
1,9/16/2015 0:00,1442376000000,1,1,0,0,0,0
2,9/17/2015 0:00,1442462400000,1,1,0,0,0,0
3,9/18/2015 0:00,1442548800000,1,1,0,0,0,0
4,9/19/2015 0:00,1442635200000,1,1,0,0,0,0


In [32]:
date_markers = date_markers[date_markers['race_period_start'] > 0].reset_index()
#dictionary of race period to start epoch
race_start_dict = {}
for i in range(0, len(date_markers)):
    race_start_dict[date_markers.values[i][3]] = date_markers.values[i][2]

#date_markers = date_markers[date_markers['bike_period_start'] > 0].reset_index()
#bike_start_dict = {}
#for i in range(0, len(date_markers)):
#    bike_start_dict[date_markers.values[i][4]] = date_markers.values[i][2]


In [33]:
race_start_dict

{1L: 1442289600000L,
 2L: 1443240000000L,
 3L: 1445054400000L,
 4L: 1446264000000L,
 5L: 1447473600000L}

In [34]:
def add_race(df):
    if (df.end_epoch >= race_start_dict[1]) and (df.end_epoch < race_start_dict[2]):
        return 1
    elif (df.end_epoch >= race_start_dict[2]) and (df.end_epoch < race_start_dict[3]):
        return 2
    elif (df.end_epoch >= race_start_dict[3]) and (df.end_epoch < race_start_dict[4]):
        return 3
    elif (df.end_epoch >= race_start_dict[4]) and (df.end_epoch < race_start_dict[5]):
        return 4
    elif (df.end_epoch >= race_start_dict[5]) and (df.end_epoch < 1448164800000):
        return 5
    else:
        return 0


In [35]:
def add_bike(df):
    if (df.end_epoch >= bike_start_dict[1]) and (df.end_epoch < bike_start_dict[2]):
        return 1
    elif (df.end_epoch >= bike_start_dict[2]) and (df.end_epoch < bike_start_dict[3]):
        return 2
    elif (df.end_epoch >= race_start_dict[3]) and (df.end_epoch < bike_start_dict[4]):
        return 3
    elif (df.end_epoch >= bike_start_dict[4]) and (df.end_epoch < bike_start_dict[5]):
        return 4
    elif (df.end_epoch >= bike_start_dict[5]) and (df.end_epoch < bike_start_dict[6]):
        return 5
    elif (df.end_epoch >= bike_start_dict[6]) and (df.end_epoch < bike_start_dict[7]):
        return 6
    elif (df.end_epoch >= bike_start_dict[7]) and (df.end_epoch < bike_start_dict[8]):
        return 7
    elif (df.end_epoch >= bike_start_dict[8]) and (df.end_epoch < bike_start_dict[9]):
        return 8
    elif (df.end_epoch >= bike_start_dict[9]):
        return 9

In [47]:
def add_periods(date_markers, df, buildup_days=0, version=None):
    if version==None:
        print "Enter Either 'race' or 'bike'!"
    if version=="race":
        #Collapse the data frame into only the start and end dates for each race period
        date_markers = date_markers[date_markers['race_period_start'] > 0].reset_index()
        #dictionary of race period to start epoch
        race_start_dict = {}
        for i in range(0, len(date_markers)):
            race_start_dict[date_markers.values[i][3]] = date_markers.values[i][2]
        #Now label each race period as 1, 2, 3, 4, or 5
        df['race_period'] = df.apply(add_race, axis=1)
        df['date_md']=df['end'].str[5:10]
        df=df.sort(['user_id', 'race_period', 'end_epoch'], ascending=[True, True, True])
    
    return df

In [51]:
test = add_periods(date_markers=date_markers, df=sleeps_only, buildup_days=0, version="race")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,user_id,start,end,score,time_in_bed,latency,disturbances,wake_duration,light_sleep_duration,slow_wave_sleep_duration,rem_sleep_duration,cycles_count,debt_post,is_nap,recovery_score,resting_heart_rate,hrv_rmssd,end_epoch,race_period,date_md
0,828,2015-09-15 02:42:00,2015-09-15 12:09:00,82,34020000,2100132,27,6870000,22230000,3600000,1320000,2,4266209.55,f,63,49,0.064647,1442318940000,1,09-15
1,828,2015-09-16 02:03:00,2015-09-16 12:06:00,87,36180000,2790134,25,6630000,22860000,3210000,3480000,3,2436815.25,f,74,45,0.07567,1442405160000,1,09-16
2,828,2015-09-17 00:36:00,2015-09-17 12:15:00,97,41940000,7800470,24,10410000,25680000,3240000,2580000,4,513169.69,f,68,44,0.036925,1442492100000,1,09-17
3,828,2015-09-18 02:38:18.552,2015-09-18 12:24:08.679,94,35150127,1410042,32,3960000,24810000,2160000,4230000,7,1425316.75,f,90,53,0.098787,1442579048000,1,09-18
5,828,2015-09-20 03:38:43.029,2015-09-20 11:44:39.378,70,29156349,1560075,18,4260000,18090000,2910000,3900000,6,7668000.0,f,42,43,0.050816,1442749479000,1,09-20
