### This notebook reads in all the heart rate json files then creates a deep detail of a single game.

In [1]:
#import the needed modules -- we have some extra bokeh packages in case we want to apply more stylings.
import json
import pandas as pd
from pandas.io.json import json_normalize
import csv
import os
import glob
import numpy
import matplotlib as pyplot
import bokeh
import seaborn as sns
import matplotlib.pyplot as plt
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from bokeh.transform import factor_cmap
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import FuncTickFormatter
from bokeh.models import HoverTool

In [2]:
#fetch the sleep data
files = glob.glob('heartrate_data/*2019-10-01*') #the json files bear the name of the day they were sampled, so just import the ones we want

file_list = []

for filename in files:
    df = pd.read_json(filename)
    file_list.append(df)

heart_df = pd.concat(file_list, axis=0, ignore_index=False, sort=False)
heart_df.head()

Unnamed: 0,dateTime,value
0,2019-10-01 04:00:04,"{'bpm': 65, 'confidence': 3}"
1,2019-10-01 04:00:09,"{'bpm': 66, 'confidence': 3}"
2,2019-10-01 04:00:19,"{'bpm': 65, 'confidence': 3}"
3,2019-10-01 04:00:24,"{'bpm': 65, 'confidence': 2}"
4,2019-10-01 04:00:34,"{'bpm': 67, 'confidence': 2}"


In [3]:
#same routine for Person 2's data

files2 = glob.glob('heartrate_data/heartrate_2/*2019-10-01*') #the json files bear the name of the day they were sampled, so just import the ones we want

file_list = []

for filename in files2:
    df2 = pd.read_json(filename)
    file_list.append(df2)

heart_df2 = pd.concat(file_list, axis=0, ignore_index=False, sort=False)
heart_df2.head()

Unnamed: 0,dateTime,value
0,2019-10-01 04:00:05,"{'bpm': 78, 'confidence': 3}"
1,2019-10-01 04:00:10,"{'bpm': 77, 'confidence': 3}"
2,2019-10-01 04:00:15,"{'bpm': 80, 'confidence': 3}"
3,2019-10-01 04:00:20,"{'bpm': 84, 'confidence': 2}"
4,2019-10-01 04:00:25,"{'bpm': 85, 'confidence': 3}"


In [4]:
#split out the json string into columns

json_struct = json.loads(heart_df.to_json(orient="records"))    
heart_df_flat = pd.io.json.json_normalize(json_struct) #use pd.io.json

json_struct2 = json.loads(heart_df2.to_json(orient="records"))    
heart_df_flat2 = pd.io.json.json_normalize(json_struct2) #use pd.io.json


heart_df_flat2.head()

Unnamed: 0,dateTime,value.bpm,value.confidence
0,1569902405000,78,3
1,1569902410000,77,3
2,1569902415000,80,3
3,1569902420000,84,2
4,1569902425000,85,3


In [5]:
#the json flattening somehow breaks the datetime column, so we will take the value.bpm and value.confidence columns and join them back to the original frame


heart_df['bpm']=heart_df_flat['value.bpm']
heart_df['confidence']=heart_df_flat['value.confidence']
heart_df.head()


heart_df2['bpm']=heart_df_flat2['value.bpm']
heart_df2['confidence']=heart_df_flat2['value.confidence']
heart_df2.head()

Unnamed: 0,dateTime,value,bpm,confidence
0,2019-10-01 04:00:05,"{'bpm': 78, 'confidence': 3}",78,3
1,2019-10-01 04:00:10,"{'bpm': 77, 'confidence': 3}",77,3
2,2019-10-01 04:00:15,"{'bpm': 80, 'confidence': 3}",80,3
3,2019-10-01 04:00:20,"{'bpm': 84, 'confidence': 2}",84,2
4,2019-10-01 04:00:25,"{'bpm': 85, 'confidence': 3}",85,3


In [6]:
#set the index on the just_date column; this will make it easier for us to plot or to join to the other dataframe
heart_df['just_date'] = heart_df['dateTime'].dt.date
heart_df.head()
heart_df.set_index('dateTime')


#set the index on the just_date column; this will make it easier for us to plot or to join to the other dataframe
heart_df2['just_date'] = heart_df2['dateTime'].dt.date
heart_df2.head()
heart_df2.set_index('dateTime')

Unnamed: 0_level_0,value,bpm,confidence,just_date
dateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-10-01 04:00:05,"{'bpm': 78, 'confidence': 3}",78,3,2019-10-01
2019-10-01 04:00:10,"{'bpm': 77, 'confidence': 3}",77,3,2019-10-01
2019-10-01 04:00:15,"{'bpm': 80, 'confidence': 3}",80,3,2019-10-01
2019-10-01 04:00:20,"{'bpm': 84, 'confidence': 2}",84,2,2019-10-01
2019-10-01 04:00:25,"{'bpm': 85, 'confidence': 3}",85,3,2019-10-01
2019-10-01 04:00:30,"{'bpm': 83, 'confidence': 3}",83,3,2019-10-01
2019-10-01 04:00:35,"{'bpm': 81, 'confidence': 3}",81,3,2019-10-01
2019-10-01 04:00:50,"{'bpm': 80, 'confidence': 3}",80,3,2019-10-01
2019-10-01 04:00:55,"{'bpm': 81, 'confidence': 3}",81,3,2019-10-01
2019-10-01 04:01:00,"{'bpm': 82, 'confidence': 3}",82,3,2019-10-01


In [7]:
#because this dataset is so huge, it can be hard to work with. We're going to make a new dataframe from every 25th value in the set

wc_df = heart_df #[heart_df.index % 50 == 0]  # Selects every 3rd raw starting from 0
wc_df2=heart_df2
wc_df2.shape

(10702, 5)

In [8]:

start_date = pd.Timestamp('2019-10-01 17:00:00')
end_date = pd.Timestamp('2019-10-02 23:59:59')

wc_df = wc_df[wc_df['dateTime'].between(start_date, end_date)]
wc_df2=wc_df2[wc_df2['dateTime'].between(start_date, end_date)]
wc_df2.tail()

Unnamed: 0,dateTime,value,bpm,confidence,just_date
10697,2019-10-02 03:59:10,"{'bpm': 96, 'confidence': 2}",96,2,2019-10-02
10698,2019-10-02 03:59:25,"{'bpm': 96, 'confidence': 2}",96,2,2019-10-02
10699,2019-10-02 03:59:35,"{'bpm': 95, 'confidence': 2}",95,2,2019-10-02
10700,2019-10-02 03:59:40,"{'bpm': 98, 'confidence': 2}",98,2,2019-10-02
10701,2019-10-02 03:59:50,"{'bpm': 97, 'confidence': 2}",97,2,2019-10-02


In [18]:
import bokeh.plotting as bk
from bokeh.models import Span, Label
from datetime import datetime as dt

import time


output_notebook()

#establish the plot and the tools
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
p = figure(plot_height=600, plot_width=1000,title="Heart Rate by Time", y_axis_type="linear",x_axis_type='datetime', tools = TOOLS)

#draw the two different fan heart rate lines
p.line(wc_df['dateTime'], wc_df.bpm, legend="Fan 1", line_color="purple", line_alpha=0.6, line_width = 1)
p.line(wc_df2['dateTime'], wc_df2.bpm, legend="Fan 2", line_color="darkblue", line_alpha =0.6,  line_width = 1)


#add the game milestones markers-- note that Bokeh isn't handling the time zone offset in the same way pandas is
#because it is a fairly small number of markers, we'll just offet them manually
#we'll find a more elegant way to do this later, but for now we'll just write the code manually

game_arrive = time.mktime(dt(2019, 10, 1, 12, 42, 0).timetuple())*1000
game_arrive_mark = Span(location=game_arrive,
                              dimension='height', line_color='black',
                              line_dash='dashed', line_width=1, line_alpha=0.6)
p.add_layout(game_arrive_mark)

game_start = time.mktime(dt(2019, 10, 1, 15, 5, 0).timetuple())*1000
game_start_mark = Span(location=game_start,
                              dimension='height', line_color='blue',
                              line_dash='dashed', line_width=3, line_alpha=0.4)
p.add_layout(game_start_mark)
game_start_label = Label(x=game_start, y=450, y_units='screen', text='Play Ball!', text_align= 'right',text_font_size='14pt', text_color='blue',text_font_style='italic',text_alpha=0.8)
p.add_layout(game_start_label)



grandal_homer = time.mktime(dt(2019, 10, 1, 15, 19, 0).timetuple())*1000
grandal_homer_mark = Span(location=grandal_homer,
                              dimension='height', line_color='black',
                              line_dash='dashed', line_width=1, line_alpha=0.6)
p.add_layout(grandal_homer_mark)


grandal_label = Label(x=grandal_homer, y=500, y_units='screen', text='Grandal Home Run', text_font_size='9pt', text_font_style='bold')
p.add_layout(grandal_label)


thames_homer = time.mktime(dt(2019, 10, 1, 15, 27, 0).timetuple())*1000
thames_homer_mark = Span(location=thames_homer,
                              dimension='height', line_color='black',
                              line_dash='dashed', line_width=1, line_alpha=0.6)
p.add_layout(thames_homer_mark)
thames_label = Label(x=thames_homer, y=375, y_units='screen', text='Thames Home Run', text_font_size='9pt', text_font_style='bold')
p.add_layout(thames_label)


turner_homer = time.mktime(dt(2019, 10, 1, 20, 4, 0).timetuple())*1000
turner_homer_mark = Span(location=turner_homer,
                              dimension='height', line_color='green',
                              line_dash='dashed', line_width=1, line_alpha=0.6)
p.add_layout(turner_homer_mark)
turner_label = Label(x=turner_homer, y=415, y_units='screen', text='Turner Home Run', text_font_size='9pt', text_font_style='bold')
p.add_layout(turner_label)


strasburg_bullpen = time.mktime(dt(2019, 10, 1, 21, 52, 0).timetuple())*1000
strasburg_bullpen_mark = Span(location=strasburg_bullpen,
                              dimension='height', line_color='green',
                              line_dash='dashed', line_width=1, line_alpha=0.6)
p.add_layout(strasburg_bullpen_mark)
strasburg_label = Label(x=strasburg_bullpen, y=490, y_units='screen', text='Strasburg Enters Game', text_font_size='9pt', text_font_style='bold')
p.add_layout(strasburg_label)




zim_single = time.mktime(dt(2019, 10, 1, 22, 49, 0).timetuple())*1000
zim_single_mark = Span(location=zim_single,
                              dimension='height', line_color='green',
                              line_dash='dashed', line_width=1, line_alpha=0.6)
p.add_layout(zim_single_mark)
zim_label = Label(x=zim_single, y=465, y_units='screen', text='Zimmerman Single', text_font_size='9pt', text_font_style='bold')
p.add_layout(zim_label)




soto_single = time.mktime(dt(2019, 10, 1, 22, 57, 0).timetuple())*1000
soto_single_mark = Span(location=soto_single,
                              dimension='height', line_color='red',
                              line_dash='dashed', line_width=2, line_alpha=0.6)
p.add_layout(soto_single_mark)
soto_label = Label(x=soto_single, y=510, y_units='screen', text='Soto Single', text_font_size='9pt', text_font_style='bold')
p.add_layout(soto_label)



#create the legends and axis labels
p.legend.location = "top_left"

p.xaxis.axis_label = 'Time (UTC)'
p.yaxis.axis_label = 'Heart Rate (Beats per Minute)'
show(p)

#output_file("wildcard_game.html", title="Wildcard Game Time Plot")


#### Outstanding Tasks:

 * add the tooltip
 * add additional datasets
 * change the x-labels to local time instead of UTC
 * it may be more stable to dump the final dataframe to a csv and then create a new notebook to render from the csv.