### In this notebook, we will analyze csv and json files with pandas and numpy, then plot with Bokeh, to visualize how your favorite sports team really is going to be the death of you.

In [1]:
#import the needed modules -- we have some extra bokeh packages in case we want to apply more stylings.
import json
import pandas as pd
from pandas.io.json import json_normalize
import csv
import os
import glob
import numpy
import matplotlib as pyplot
import bokeh
import seaborn as sns
import matplotlib.pyplot as plt
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from bokeh.transform import factor_cmap
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import FuncTickFormatter
from bokeh.models import HoverTool

In [2]:
#fetch the sleep data
files = glob.glob('sleep_data/*')

file_list = []

for filename in files:
    df = pd.read_csv(filename, index_col=None, header=0)
    file_list.append(df)

sleep_df = pd.concat(file_list, axis=0, ignore_index=False, sort=False)
sleep_df.head()

Unnamed: 0,sleep_log_entry_id,timestamp,overall_score,composition_score,revitalization_score,duration_score,deep_sleep_in_minutes,resting_heart_rate,restlessness
0,24742471786,2019-11-22T07:00:30Z,82,21,20,41,71,59,0.050619
1,24721802730,2019-11-21T06:19:30Z,74,20,17,37,43,58,0.05007
2,24706742952,2019-11-20T07:33:00Z,78,20,20,38,44,59,0.0871
3,24692555453,2019-11-19T07:24:30Z,78,20,20,38,40,61,0.090493
4,24678772005,2019-11-18T06:10:30Z,78,22,18,38,100,61,0.066167


In [3]:
#fetch the sleep data
files = glob.glob('heartrate_data/*')

file_list = []

for filename in files:
    df = pd.read_json(filename)
    file_list.append(df)

heart_df = pd.concat(file_list, axis=0, ignore_index=False, sort=False)
heart_df.head()

Unnamed: 0,dateTime,value
0,2019-09-01 04:00:00,"{'bpm': 71, 'confidence': 2}"
1,2019-09-01 04:00:05,"{'bpm': 69, 'confidence': 2}"
2,2019-09-01 04:00:15,"{'bpm': 70, 'confidence': 3}"
3,2019-09-01 04:00:20,"{'bpm': 71, 'confidence': 3}"
4,2019-09-01 04:00:25,"{'bpm': 72, 'confidence': 2}"


In [4]:
#split out the json string into columns

json_struct = json.loads(heart_df.to_json(orient="records"))    
heart_df_flat = pd.io.json.json_normalize(json_struct) #use pd.io.json
heart_df_flat.head()

Unnamed: 0,dateTime,value.bpm,value.confidence
0,1567310400000,71,2
1,1567310405000,69,2
2,1567310415000,70,3
3,1567310420000,71,3
4,1567310425000,72,2


In [5]:
#the json flattening somehow breaks the datetime column, so we will take the value.bpm and value.confidence columns and join them back to the original frame


heart_df['bpm']=heart_df_flat['value.bpm']
heart_df['confidence']=heart_df_flat['value.confidence']
heart_df.head()

Unnamed: 0,dateTime,value,bpm,confidence
0,2019-09-01 04:00:00,"{'bpm': 71, 'confidence': 2}",71,2
1,2019-09-01 04:00:05,"{'bpm': 69, 'confidence': 2}",69,2
2,2019-09-01 04:00:15,"{'bpm': 70, 'confidence': 3}",70,3
3,2019-09-01 04:00:20,"{'bpm': 71, 'confidence': 3}",71,3
4,2019-09-01 04:00:25,"{'bpm': 72, 'confidence': 2}",72,2


In [6]:
#now let's clean up the timestamp column of the sleep dataframe

sleep_df.timestamp 
sleep_df['timestamp'] = pd.to_datetime(sleep_df['timestamp'])

#then let's drop the hours and minutes and just retain the date
sleep_df['just_date'] = sleep_df['timestamp'].dt.date
heart_df['just_date'] = heart_df['dateTime'].dt.date

In [7]:
sleep_df.head()
heart_df.head()

Unnamed: 0,dateTime,value,bpm,confidence,just_date
0,2019-09-01 04:00:00,"{'bpm': 71, 'confidence': 2}",71,2,2019-09-01
1,2019-09-01 04:00:05,"{'bpm': 69, 'confidence': 2}",69,2,2019-09-01
2,2019-09-01 04:00:15,"{'bpm': 70, 'confidence': 3}",70,3,2019-09-01
3,2019-09-01 04:00:20,"{'bpm': 71, 'confidence': 3}",71,3,2019-09-01
4,2019-09-01 04:00:25,"{'bpm': 72, 'confidence': 2}",72,2,2019-09-01


In [8]:
 #get the average heartrate per day, into a new frame. first we need to cast the just_date columns back to date/time
heart_df['just_date'] = pd.to_datetime(heart_df['just_date'])
sleep_df['just_date'] = pd.to_datetime(sleep_df['just_date'])
  
merged_df = heart_df.resample('D', on='just_date').mean()
merged_df.head()

Unnamed: 0_level_0,bpm,confidence
just_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-09-01,84.513602,2.297606
2019-09-02,79.902821,2.728846
2019-09-03,83.52452,2.199896
2019-09-04,83.8255,2.157
2019-09-05,83.711057,2.144283


In [9]:
#set the index on the just_date column; this will make it easier for us to plot or to join to the other dataframe
sleep_df.set_index('just_date')

Unnamed: 0_level_0,sleep_log_entry_id,timestamp,overall_score,composition_score,revitalization_score,duration_score,deep_sleep_in_minutes,resting_heart_rate,restlessness
just_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-11-22,24742471786,2019-11-22 07:00:30,82,21,20,41,71,59,0.050619
2019-11-21,24721802730,2019-11-21 06:19:30,74,20,17,37,43,58,0.050070
2019-11-20,24706742952,2019-11-20 07:33:00,78,20,20,38,44,59,0.087100
2019-11-19,24692555453,2019-11-19 07:24:30,78,20,20,38,40,61,0.090493
2019-11-18,24678772005,2019-11-18 06:10:30,78,22,18,38,100,61,0.066167
2019-11-17,24666462241,2019-11-17 07:35:30,76,23,12,41,122,59,0.108891
2019-11-16,24656074081,2019-11-16 06:11:00,77,21,18,38,51,59,0.039591
2019-11-15,24647189590,2019-11-15 06:29:00,78,21,19,38,58,59,0.038627
2019-11-14,24634287814,2019-11-14 07:14:00,76,20,18,38,33,60,0.056000
2019-11-13,24620494353,2019-11-13 06:20:00,72,17,22,33,44,63,0.087671


In [13]:
#basic chart with Bokeh

output_notebook()

# Store the data in a ColumnDataSource
heart_cds = ColumnDataSource(merged_df)
sleep_cds = ColumnDataSource(sleep_df)

# Specify the selection tools to be made available
select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']

# Create the figure
fig = figure(plot_height=600,
             plot_width=800,
             x_axis_label='Date',
             x_axis_type='datetime',
             y_axis_label='Average Heart Rate (BPM)',
             
             title='The Cardiac Nats: How a Playoff Race Affects Sports Fans',
             toolbar_location='below',
             tools=select_tools)



# Add the markers
fig.circle(x='just_date',
           y='bpm',
           source=heart_cds,
           size=15,
           alpha=0.5,
           color='lightblue',
           line_color='blue',
           selection_color='deepskyblue',
           nonselection_color='lightgray',
           nonselection_alpha=0.3)

fig.circle(x='just_date',
           y='overall_score',
           source=sleep_cds,
           size=15,
           alpha=0.5,
           color='purple',
           line_color='blue',
           selection_color='deepskyblue',
           nonselection_color='lightgray',
           nonselection_alpha=0.3)



# Format the tooltip
tooltips = [
            ('Date: ','@just_date'),
            ('Beats Per Minute', '@bpm'),
            ('Sleep Score','@overall_score')
        
            
           ]

# Add the HoverTool to the figure
fig.add_tools(HoverTool(tooltips=tooltips))

# Visualize
show(fig)

#### Outstanding Tasks:

 * format the tooltip as date/time
 * improve the tooltip to show only heart rate or only sleep score, depending on category
 * drop in the markers for game dates
 * expand the dataset to include post-11/15
 * improve the stylings