In [1]:
%matplotlib widget
import os,sys,io, time
import pandas as pd
#import numpy as np, matplotlib as mpl, matplotlib.pyplot as plt

import bokeh.plotting
import bokeh.io
import bokeh.models
from bokeh.io import output_notebook
bokeh.io.output_notebook()

In [2]:
def file_age(filepath):
    return time.time() - os.path.getmtime(filepath)


In [3]:
# get the Virginia COVID Case data from https://data.virginia.gov/Government/VDH-COVID-19-PublicUseDataset-Cases/bre9-aqqr

df_name = "VA_vdh_casedata.csv"
if 1 or file_age(df_name) > 86400:
    !wget -O $df_name 'https://data.virginia.gov/api/views/bre9-aqqr/rows.csv?accessType=DOWNLOAD'

--2020-11-20 12:44:55--  https://data.virginia.gov/api/views/bre9-aqqr/rows.csv?accessType=DOWNLOAD
Resolving data.virginia.gov (data.virginia.gov)... 52.206.140.199, 52.206.140.205, 52.206.68.26
Connecting to data.virginia.gov (data.virginia.gov)|52.206.140.199|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘VA_vdh_casedata.csv’

VA_vdh_casedata.csv     [   <=>              ]   1.54M  2.50MB/s    in 0.6s    

2020-11-20 12:44:57 (2.50 MB/s) - ‘VA_vdh_casedata.csv’ saved [1610236]



In [4]:
df=pd.read_csv(df_name)
df["date"] = pd.to_datetime(df['Report Date'])

df.tail()

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date
33112,11/20/2020,51800,Suffolk,Western Tidewater,2582,153,80,2020-11-20
33113,11/20/2020,51810,Virginia Beach,Virginia Beach,9716,500,112,2020-11-20
33114,11/20/2020,51820,Waynesboro,Central Shenandoah,502,16,4,2020-11-20
33115,11/20/2020,51830,Williamsburg,Peninsula,275,15,8,2020-11-20
33116,11/20/2020,51840,Winchester,Lord Fairfax,847,50,4,2020-11-20


In [5]:

df = df.sort_values(by=['Locality', 'VDH Health District', 'date'])
display(df.head())

df['TC_diff']= df.groupby('Locality')['Total Cases'].diff().fillna(0)
df['TC_sum14']= df.groupby('Locality')['Total Cases'].diff(14).fillna(0)

display(df.tail())



Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date
0,03/17/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-17
133,03/18/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-18
266,03/19/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-19
399,03/20/2020,51001,Accomack,Eastern Shore,1,0,0,2020-03-20
532,03/21/2020,51001,Accomack,Eastern Shore,1,0,0,2020-03-21


Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date,TC_diff,TC_sum14
32546,11/16/2020,51199,York,Peninsula,759,26,9,2020-11-16,14.0,98.0
32679,11/17/2020,51199,York,Peninsula,767,26,9,2020-11-17,8.0,102.0
32812,11/18/2020,51199,York,Peninsula,775,26,9,2020-11-18,8.0,107.0
32945,11/19/2020,51199,York,Peninsula,783,26,9,2020-11-19,8.0,113.0
33078,11/20/2020,51199,York,Peninsula,806,26,9,2020-11-20,23.0,120.0


In [6]:
# subset for York and normalize per capita
dfy = df[df['Locality']=='York'].copy()
dfy['per100k_14daysum']=dfy['TC_sum14']*100000/67782  

In [7]:
dfy

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date,TC_diff,TC_sum14,per100k_14daysum
94,03/17/2020,51199,York,Peninsula,1,0,1,2020-03-17,0.0,0.0,0.000000
227,03/18/2020,51199,York,Peninsula,1,1,1,2020-03-18,0.0,0.0,0.000000
360,03/19/2020,51199,York,Peninsula,1,1,1,2020-03-19,0.0,0.0,0.000000
493,03/20/2020,51199,York,Peninsula,3,1,1,2020-03-20,2.0,0.0,0.000000
626,03/21/2020,51199,York,Peninsula,3,1,1,2020-03-21,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
32546,11/16/2020,51199,York,Peninsula,759,26,9,2020-11-16,14.0,98.0,144.581157
32679,11/17/2020,51199,York,Peninsula,767,26,9,2020-11-17,8.0,102.0,150.482429
32812,11/18/2020,51199,York,Peninsula,775,26,9,2020-11-18,8.0,107.0,157.859019
32945,11/19/2020,51199,York,Peninsula,783,26,9,2020-11-19,8.0,113.0,166.710926


In [8]:
ph = dfy.plot(y='per100k_14daysum',x='date',title="York County Number of new cases per 100,000 persons \nwithin the last 14 days")

ph

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'York County Number of new cases per 100,000 persons \nwithin the last 14 days'}, xlabel='date'>

In [9]:
ph = dfy.plot(y='TC_diff',x='date',title="York County Cases, 14 day sum, per 100K")
ph

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'York County Cases, 14 day sum, per 100K'}, xlabel='date'>

In [10]:
TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("radius", "@radius"),
]


#p=bokeh.plotting.figure( tooltips=TOOLTIPS, x_axis_type='datetime')
p=bokeh.plotting.figure( x_axis_type='datetime',y_range=(0,250),title="York County Number of new cases per 100,000 persons within the last 14 days")
p.add_tools(bokeh.models.HoverTool())

#hover = p.select(dict(type=bokeh.models.HoverTool))


#hover(tooltips=TOOLTIPS,
#)

p.add_layout(bokeh.models.BoxAnnotation(bottom=0,top=5, fill_alpha=0.4, fill_color='olive'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=5,top=20, fill_alpha=0.4, fill_color='green'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=20,top=50, fill_alpha=0.4, fill_color='yellow'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=50,top=200, fill_alpha=0.4, fill_color='orange'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=200, fill_alpha=0.4, fill_color='red'))



p.line(dfy['date'],dfy['per100k_14daysum'])
#p.title()

#?p.line

In [11]:
bokeh.plotting.show(p)

In [14]:
bokeh.plotting.output_file('YorkCountyCovidMetric_plot.html', mode='inline')
bokeh.plotting.save(p)


'/Users/drf/2020/Home/School/YCSD_covid_metrics/YorkCountyCovidMetric_plot.html'