### This notebook reads in all the heart rate json files then creates a deep detail of a single game.

In [1]:
#import the needed modules -- we have some extra bokeh packages in case we want to apply more stylings.
import json
import pandas as pd
from pandas.io.json import json_normalize
import csv
import os
import glob
import numpy
import matplotlib as pyplot
import bokeh
import seaborn as sns
import matplotlib.pyplot as plt
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from bokeh.transform import factor_cmap
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import FuncTickFormatter
from bokeh.models import HoverTool

In [2]:
#fetch the sleep data
files = glob.glob('heartrate_data/*2019-10-01*') #the json files bear the name of the day they were sampled, so just import the ones we want

file_list = []

for filename in files:
    df = pd.read_json(filename)
    file_list.append(df)

heart_df = pd.concat(file_list, axis=0, ignore_index=False, sort=False)
heart_df.head()

Unnamed: 0,dateTime,value
0,2019-10-01 04:00:04,"{'bpm': 65, 'confidence': 3}"
1,2019-10-01 04:00:09,"{'bpm': 66, 'confidence': 3}"
2,2019-10-01 04:00:19,"{'bpm': 65, 'confidence': 3}"
3,2019-10-01 04:00:24,"{'bpm': 65, 'confidence': 2}"
4,2019-10-01 04:00:34,"{'bpm': 67, 'confidence': 2}"


In [3]:
#split out the json string into columns

json_struct = json.loads(heart_df.to_json(orient="records"))    
heart_df_flat = pd.io.json.json_normalize(json_struct) #use pd.io.json
heart_df_flat.head()

Unnamed: 0,dateTime,value.bpm,value.confidence
0,1569902404000,65,3
1,1569902409000,66,3
2,1569902419000,65,3
3,1569902424000,65,2
4,1569902434000,67,2


In [4]:
#the json flattening somehow breaks the datetime column, so we will take the value.bpm and value.confidence columns and join them back to the original frame


heart_df['bpm']=heart_df_flat['value.bpm']
heart_df['confidence']=heart_df_flat['value.confidence']
heart_df.head()

Unnamed: 0,dateTime,value,bpm,confidence
0,2019-10-01 04:00:04,"{'bpm': 65, 'confidence': 3}",65,3
1,2019-10-01 04:00:09,"{'bpm': 66, 'confidence': 3}",66,3
2,2019-10-01 04:00:19,"{'bpm': 65, 'confidence': 3}",65,3
3,2019-10-01 04:00:24,"{'bpm': 65, 'confidence': 2}",65,2
4,2019-10-01 04:00:34,"{'bpm': 67, 'confidence': 2}",67,2


In [5]:
#set the index on the just_date column; this will make it easier for us to plot or to join to the other dataframe
heart_df['just_date'] = heart_df['dateTime'].dt.date
heart_df.head()
heart_df.set_index('dateTime')

Unnamed: 0_level_0,value,bpm,confidence,just_date
dateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-10-01 04:00:04,"{'bpm': 65, 'confidence': 3}",65,3,2019-10-01
2019-10-01 04:00:09,"{'bpm': 66, 'confidence': 3}",66,3,2019-10-01
2019-10-01 04:00:19,"{'bpm': 65, 'confidence': 3}",65,3,2019-10-01
2019-10-01 04:00:24,"{'bpm': 65, 'confidence': 2}",65,2,2019-10-01
2019-10-01 04:00:34,"{'bpm': 67, 'confidence': 2}",67,2,2019-10-01
2019-10-01 04:00:39,"{'bpm': 70, 'confidence': 3}",70,3,2019-10-01
2019-10-01 04:00:44,"{'bpm': 69, 'confidence': 2}",69,2,2019-10-01
2019-10-01 04:00:59,"{'bpm': 69, 'confidence': 2}",69,2,2019-10-01
2019-10-01 04:01:04,"{'bpm': 67, 'confidence': 2}",67,2,2019-10-01
2019-10-01 04:01:09,"{'bpm': 64, 'confidence': 3}",64,3,2019-10-01


In [9]:
#because this dataset is so huge, it can be hard to work with. We're going to make a new dataframe from every 25th value in the set

wc_df = heart_df #[heart_df.index % 50 == 0]  # Selects every 3rd raw starting from 0
wc_df.shape

(9495, 5)

In [14]:

start_date = pd.Timestamp('2019-10-01 17:00:00')
end_date = pd.Timestamp('2019-10-02 23:59:59')

wc_df = wc_df[wc_df['dateTime'].between(start_date, end_date)]
wc_df.tail()

Unnamed: 0,dateTime,value,bpm,confidence,just_date
7775,2019-10-01 23:59:33,"{'bpm': 106, 'confidence': 3}",106,3,2019-10-01
7776,2019-10-01 23:59:38,"{'bpm': 105, 'confidence': 2}",105,2,2019-10-01
7777,2019-10-01 23:59:43,"{'bpm': 106, 'confidence': 2}",106,2,2019-10-01
7778,2019-10-01 23:59:48,"{'bpm': 108, 'confidence': 2}",108,2,2019-10-01
7779,2019-10-01 23:59:55,"{'bpm': 111, 'confidence': 2}",111,2,2019-10-01


In [37]:
import bokeh.plotting as bk
from bokeh.models import Span, Label
from datetime import datetime as dt

import time


#output_notebook()

TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
p = figure(plot_height=600, plot_width=1000,title="Heart Rate by Time", y_axis_type="linear",x_axis_type='datetime', tools = TOOLS)

p.line(wc_df['dateTime'], wc_df.bpm, legend="Heart Rate", line_color="purple", line_width = 3)

#add the game milestones markers-- note that Bokeh isn't handling the time zone offset in the same way pandas is
#because it is a fairly small number of markers, we'll just offet them manually

game_arrive = time.mktime(dt(2019, 10, 1, 12, 42, 0).timetuple())*1000
game_arrive_mark = Span(location=game_arrive,
                              dimension='height', line_color='black',
                              line_dash='dashed', line_width=1)
p.add_layout(game_arrive_mark)

game_start = time.mktime(dt(2019, 10, 1, 15, 5, 0).timetuple())*1000
game_start_mark = Span(location=game_start,
                              dimension='height', line_color='green',
                              line_dash='dashed', line_width=1)
p.add_layout(game_start_mark)



grandal_homer = time.mktime(dt(2019, 10, 1, 15, 19, 0).timetuple())*1000
grandal_homer_mark = Span(location=grandal_homer,
                              dimension='height', line_color='black',
                              line_dash='dashed', line_width=2)
p.add_layout(grandal_homer_mark)


grandal_label = Label(x=grandal_homer, y=500, y_units='screen', text='Grandal Home Run')
p.add_layout(grandal_label)


thames_homer = time.mktime(dt(2019, 10, 1, 15, 27, 0).timetuple())*1000
thames_homer_mark = Span(location=thames_homer,
                              dimension='height', line_color='black',
                              line_dash='dashed', line_width=2)
p.add_layout(thames_homer_mark)
thames_label = Label(x=thames_homer, y=375, y_units='screen', text='Thames Home Run')
p.add_layout(thames_label)


turner_homer = time.mktime(dt(2019, 10, 1, 16, 4, 0).timetuple())*1000
turner_homer_mark = Span(location=turner_homer,
                              dimension='height', line_color='green',
                              line_dash='dashed', line_width=2)
p.add_layout(turner_homer_mark)
turner_label = Label(x=turner_homer, y=400, y_units='screen', text='Turner Home Run')
p.add_layout(turner_label)

soto_single = time.mktime(dt(2019, 10, 1, 17, 50, 0).timetuple())*1000
soto_single_mark = Span(location=soto_single,
                              dimension='height', line_color='green',
                              line_dash='dashed', line_width=5)
p.add_layout(soto_single_mark)
soto_label = Label(x=soto_single, y=450, y_units='screen', text='Soto Single')
p.add_layout(soto_label)


p.legend.location = "top_left"

p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Heart Rate'
show(p)

output_file("wildcard_game.html", title="Wildcard Game Time Plot")


#### Outstanding Tasks:

 * add the tooltip
 * drop in the markers for remaining game events; improve the labels
 * improve the stylings; resize the chart
 * it may be more stable to dump the final dataframe to a csv and then create a new notebook to render from the csv.