### In this notebook, we will analyze csv and json files with pandas and numpy, then plot with Bokeh, to visualize how your favorite sports team really is going to be the death of you.

In [1]:
#import the needed modules -- we have some extra bokeh packages in case we want to apply more stylings.
import json
import pandas as pd
from pandas.io.json import json_normalize
import csv
import os
import glob
import numpy
import matplotlib as pyplot
import bokeh
import seaborn as sns
import matplotlib.pyplot as plt
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from bokeh.transform import factor_cmap
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import FuncTickFormatter
from bokeh.models import HoverTool

In [2]:
#fetch the sleep data

files = glob.glob('sleep_data/*')

file_list = []

for filename in files:
    df = pd.read_csv(filename, index_col=None, header=0)
    file_list.append(df)

sleep_df = pd.concat(file_list, axis=0, ignore_index=False, sort=False)
sleep_df.head()

Unnamed: 0,sleep_log_entry_id,timestamp,overall_score,composition_score,revitalization_score,duration_score,deep_sleep_in_minutes,resting_heart_rate,restlessness,fan
0,24742471786,2019-11-22T07:00:30Z,82,21,20,41,71,59,0.050619,fan_1
1,24721802730,2019-11-21T06:19:30Z,74,20,17,37,43,58,0.05007,fan_1
2,24706742952,2019-11-20T07:33:00Z,78,20,20,38,44,59,0.0871,fan_1
3,24692555453,2019-11-19T07:24:30Z,78,20,20,38,40,61,0.090493,fan_1
4,24678772005,2019-11-18T06:10:30Z,78,22,18,38,100,61,0.066167,fan_1


In [3]:
#now let's clean up the timestamp column of the sleep dataframe

sleep_df.timestamp 
sleep_df['timestamp'] = pd.to_datetime(sleep_df['timestamp'])

#then let's drop the hours and minutes and just retain the date
sleep_df['just_date'] = sleep_df['timestamp'].dt.date


In [4]:
sleep_df['just_date']=pd.to_datetime(sleep_df['just_date'])
sleep_df.dtypes

sleep_log_entry_id                int64
timestamp                datetime64[ns]
overall_score                     int64
composition_score                 int64
revitalization_score              int64
duration_score                    int64
deep_sleep_in_minutes             int64
resting_heart_rate                int64
restlessness                    float64
fan                              object
just_date                datetime64[ns]
dtype: object

In [5]:
#get the mean and median sleep and heart scores 
print('Median Heart Rate Score (entire record): ', sleep_df.resting_heart_rate.median())
print('Mean Heart Rate Score ', sleep_df.resting_heart_rate.mean())
print('Median Sleep Score (entire record): ', sleep_df.overall_score.median())
print('Mean Sleep Score ', sleep_df.overall_score.mean())

Median Heart Rate Score (entire record):  64.0
Mean Heart Rate Score  66.4573055028463
Median Sleep Score (entire record):  74.0
Mean Sleep Score  73.40037950664137


In [6]:
#crop off the dates after the target range
sleep_df = sleep_df[(sleep_df['just_date'] > '2019-09-01') & (sleep_df['just_date'] <= '2019-11-05')]
sleep_df["text_date"]=sleep_df['just_date'].apply(lambda x: x.strftime('%m-%d'))
sleep_df.head()

Unnamed: 0,sleep_log_entry_id,timestamp,overall_score,composition_score,revitalization_score,duration_score,deep_sleep_in_minutes,resting_heart_rate,restlessness,fan,just_date,text_date
17,24527041386,2019-11-05 06:41:30,78,21,20,37,78,61,0.070404,fan_1,2019-11-05,11-05
18,24510142444,2019-11-04 07:56:00,75,21,14,40,76,66,0.082863,fan_1,2019-11-04,11-04
19,24497283872,2019-11-03 07:16:00,72,19,13,36,77,62,0.097887,fan_1,2019-11-03,11-03
20,24487227269,2019-11-02 05:52:30,77,19,13,30,45,66,0.056,fan_1,2019-11-02,11-02
21,24479779403,2019-11-01 07:38:30,63,17,20,26,0,66,0.092084,fan_1,2019-11-01,11-01


In [7]:
sleep_df.dtypes

sleep_log_entry_id                int64
timestamp                datetime64[ns]
overall_score                     int64
composition_score                 int64
revitalization_score              int64
duration_score                    int64
deep_sleep_in_minutes             int64
resting_heart_rate                int64
restlessness                    float64
fan                              object
just_date                datetime64[ns]
text_date                        object
dtype: object

In [8]:
#smaller chart with only two data types and just the means

import bokeh.plotting as bk
from bokeh.models import Span, Label
from datetime import datetime as dt

import time

output_notebook()

# create  a ColumnDataSource
sleep_cds = ColumnDataSource(sleep_df)

#create a grouped CDS
group = sleep_df.groupby('just_date')
grouped_cds = ColumnDataSource(group)

grouped_cds.add(sleep_df['just_date'].apply(lambda d: d.strftime('%Y-%m-%d')), 'event_date_formatted')
    


 # Specify the selection tools to be made available
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'


# Create the figure
fig = figure(plot_height=650, plot_width=1100, x_axis_label='Date', x_axis_type='datetime', y_axis_label='Average Resting Heart Rate (BPM)',
             title='The Cardiac Nats: How a Playoff Race Affects Sports Fans',  toolbar_location='below', tools=TOOLS)


#Add the lines for the overall average sleep and heart scores

fig.line(x='just_date', y=63,  source=grouped_cds, line_width=2, alpha=0.3, line_color='red',line_dash="dashed", legend='6 Month Heart Rate Average')

fig.line(x='just_date', y=76,  source=grouped_cds, line_width=2, alpha=0.5, line_color='purple',line_dash="dashed", legend='6 Month Sleep Score Average')


# Add the lines for the playoffs
 
fig.line(x='just_date',
           y='resting_heart_rate_mean', source=grouped_cds, line_width=5, alpha=0.3, color='red', line_color='red', legend='Sept/Oct Average Resting Heart Rate')

#then the sleep overall scores
fig.line(x='just_date', y='overall_score_mean',  source=grouped_cds, line_width=5, alpha=0.2, line_color='purple', legend='Sept/Oct Average Sleep Quality')





#Add the notable games
comeback_game = time.mktime(dt(2019, 9, 3, 0,0, 0,0).timetuple())*1000
comeback_game_mark = Span(location=comeback_game, dimension='height', line_color='green',line_dash='dashed', line_width=1, line_alpha=0.6)
fig.add_layout(comeback_game_mark)

clinch = time.mktime(dt(2019, 9, 24, 0,0, 0,0).timetuple())*1000
clinch_mark = Span(location=clinch, dimension='height', line_color='green', line_dash='dashed', line_width=1, line_alpha=0.6)
fig.add_layout(clinch_mark)

complete_sweep = time.mktime(dt(2019, 9, 16, 0,0, 0,0).timetuple())*1000
complete_sweep_mark = Span(location=complete_sweep, dimension='height', line_color='green', line_dash='dashed', line_width=1, line_alpha=0.6)
fig.add_layout(complete_sweep_mark)

#add the wild card game

wild_card = time.mktime(dt(2019, 10, 1, 0,0, 0,0).timetuple())*1000
wild_card_mark = Span(location=wild_card, dimension='height', line_color='darkblue', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(wild_card_mark)

#add the NLDS

nlds_1 = time.mktime(dt(2019, 10, 3, 0,0, 0,0).timetuple())*1000
nlds_1_mark = Span(location=nlds_1, dimension='height', line_color='blue', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlds_1_mark)

nlds_2 = time.mktime(dt(2019, 10, 4, 0,0, 0,0).timetuple())*1000
nlds_2_mark = Span(location=nlds_2, dimension='height', line_color='blue', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlds_2_mark)


nlds_3 = time.mktime(dt(2019, 10, 6, 0,0, 0,0).timetuple())*1000
nlds_3_mark = Span(location=nlds_3, dimension='height', line_color='blue', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlds_3_mark)

nlds_4 = time.mktime(dt(2019, 10, 7, 0,0, 0,0).timetuple())*1000
nlds_4_mark = Span(location=nlds_4, dimension='height', line_color='blue', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlds_4_mark)

nlds_5 = time.mktime(dt(2019, 10, 9, 0,0, 0,0).timetuple())*1000
nlds_5_mark = Span(location=nlds_5, dimension='height', line_color='blue', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlds_5_mark)

#add the NLCS

nlcs_1 = time.mktime(dt(2019, 10, 11, 0,0, 0,0).timetuple())*1000
nlcs_1_mark = Span(location=nlcs_1, dimension='height', line_color='red', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlcs_1_mark)

nlcs_2 = time.mktime(dt(2019, 10, 12, 0,0, 0,0).timetuple())*1000
nlcs_2_mark = Span(location=nlcs_2, dimension='height', line_color='red', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlcs_2_mark)


nlcs_3 = time.mktime(dt(2019, 10, 14, 0,0, 0,0).timetuple())*1000
nlcs_3_mark = Span(location=nlcs_3, dimension='height', line_color='red', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlcs_3_mark)

nlcs_4 = time.mktime(dt(2019, 10, 15, 0,0, 0,0).timetuple())*1000
nlcs_4_mark = Span(location=nlcs_4, dimension='height', line_color='red', line_dash='dashed', line_width=2, line_alpha=0.6)
fig.add_layout(nlcs_4_mark)

#now add the world series

ws_1 = time.mktime(dt(2019, 10, 22, 0,0, 0,0).timetuple())*1000
ws_1_mark = Span(location=ws_1, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_1_mark)

ws_2 = time.mktime(dt(2019, 10, 23, 0,0, 0,0).timetuple())*1000
ws_2_mark = Span(location=ws_2, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_2_mark)


ws_3 = time.mktime(dt(2019, 10, 25, 0,0, 0,0).timetuple())*1000
ws_3_mark = Span(location=ws_3, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_3_mark)

ws_4 = time.mktime(dt(2019, 10, 26, 0,0, 0,0).timetuple())*1000
ws_4_mark = Span(location=ws_4, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_4_mark)

ws_5 = time.mktime(dt(2019, 10, 27, 0,0, 0,0).timetuple())*1000
ws_5_mark = Span(location=ws_5, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_5_mark)


ws_6 = time.mktime(dt(2019, 10, 29, 0,0, 0,0).timetuple())*1000
ws_6_mark = Span(location=ws_6, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_6_mark)

ws_7 = time.mktime(dt(2019, 10, 30, 0,0, 0,0).timetuple())*1000
ws_7_mark = Span(location=ws_7, dimension='height', line_color='orange', line_dash='dashed', line_width=2, line_alpha=0.7)
fig.add_layout(ws_7_mark)

#mark the parade

parade = time.mktime(dt(2019, 11, 2, 0,0, 0,0).timetuple())*1000
parade_mark = Span(location=parade, dimension='height', line_color='purple', line_dash='dashed', line_width=1, line_alpha=0.7)
fig.add_layout(parade_mark)

#add the legend
fig.legend.location = "top_left"
fig.legend.click_policy="hide"

#add the labels


nlds_label = Label(x=nlds_1, y=325, y_units='screen', text='NLDS', text_font_size='11pt', text_font_style='bold')
fig.add_layout(nlds_label)



nlcs_label = Label(x=nlcs_1, y=325, y_units='screen', text='NLCS', text_font_size='11pt', text_font_style='bold')
fig.add_layout(nlcs_label)

ws_label = Label(x=ws_1, y=325, y_units='screen', text='World Series', text_font_size='11pt', text_font_style='bold')
fig.add_layout(ws_label)


# Format the tooltip
#tooltips = [('Date: ','@just_date')   ]
tooltips = [('Sleep Score', '@overall_score_mean'), ('Mean Resting Heart Rate','@resting_heart_rate_mean')]
hover = fig.select(dict(type=HoverTool))
#hover.formatters = {'just_date': "datetime"} # need CDS column name as key                       

# Add the HoverTool to the figure
fig.add_tools(HoverTool(tooltips=tooltips))



#Create the output file

output_file("heart_and_sleep.html", title="Sleep Score and Heart Rate Over Time")

# Visualize
show(fig)

