# Time Series of YCSD Covid Case Metric

This notebook examines the time series of the York County COVID Cases ostensibly used by YCSD to make decisions about school reopenings.

* YCSD presents there metrics on https://ycsd.yorkcountyschools.org/domain/1313
* VDH represents the this number for the localities on https://www.vdh.virginia.gov/coronavirus/coronavirus/covid-19-in-virginia-locality/ and on https://www.vdh.virginia.gov/coronavirus/key-measures/pandemic-metrics/school-metrics/ under the localities tab
* VDH shares the data at https://data.virginia.gov/Government/VDH-COVID-19-PublicUseDataset-Cases/bre9-aqqr
* I'm sharing This notebook in Github at https://github.com/drf5n/YCSD_covid_metrics

-- David Forrest


In [1]:
%matplotlib widget
import os,sys,io, time, pathlib
import pandas as pd
#import numpy as np, matplotlib as mpl, matplotlib.pyplot as plt

import bokeh.plotting
import bokeh.io
import bokeh.models
from bokeh.io import output_notebook
bokeh.io.output_notebook()

In [2]:
def file_age(filepath):
    return time.time() - os.path.getmtime(filepath)


In [6]:
# get the Virginia COVID Case data from https://data.virginia.gov/Government/VDH-COVID-19-PublicUseDataset-Cases/bre9-aqqr

df_name = "VA_vdh_casedata.csv"
if file_age(df_name) > 86400:
    !wget -O $df_name 'https://data.virginia.gov/api/views/bre9-aqqr/rows.csv?accessType=DOWNLOAD'
    pathlib.Path(df_name).touch()

In [7]:
df=pd.read_csv(df_name)
df["date"] = pd.to_datetime(df['Report Date'])

df.tail()

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date
33777,11/25/2020,51800,Suffolk,Western Tidewater,2678,159,80,2020-11-25
33778,11/25/2020,51810,Virginia Beach,Virginia Beach,10387,526,113,2020-11-25
33779,11/25/2020,51820,Waynesboro,Central Shenandoah,580,18,5,2020-11-25
33780,11/25/2020,51830,Williamsburg,Peninsula,285,17,8,2020-11-25
33781,11/25/2020,51840,Winchester,Lord Fairfax,915,55,4,2020-11-25


In [8]:

df = df.sort_values(by=['Locality', 'VDH Health District', 'date'])
display(df.head())

df['TC_diff']= df.groupby('Locality')['Total Cases'].diff().fillna(0)
df['TC_sum14']= df.groupby('Locality')['Total Cases'].diff(14).fillna(0)

display(df.tail())



Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date
0,03/17/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-17
133,03/18/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-18
266,03/19/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-19
399,03/20/2020,51001,Accomack,Eastern Shore,1,0,0,2020-03-20
532,03/21/2020,51001,Accomack,Eastern Shore,1,0,0,2020-03-21


Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date,TC_diff,TC_sum14
33211,11/21/2020,51199,York,Peninsula,817,27,9,2020-11-21,11.0,120.0
33344,11/22/2020,51199,York,Peninsula,825,27,9,2020-11-22,8.0,118.0
33477,11/23/2020,51199,York,Peninsula,848,27,9,2020-11-23,23.0,138.0
33610,11/24/2020,51199,York,Peninsula,859,27,9,2020-11-24,11.0,146.0
33743,11/25/2020,51199,York,Peninsula,867,27,9,2020-11-25,8.0,149.0


In [9]:
# subset for York and normalize per capita
dfy = df[df['Locality']=='York'].copy()
dfy['per100k_14daysum']=dfy['TC_sum14']*100000/67782  

In [10]:
dfy

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date,TC_diff,TC_sum14,per100k_14daysum
94,03/17/2020,51199,York,Peninsula,1,0,1,2020-03-17,0.0,0.0,0.000000
227,03/18/2020,51199,York,Peninsula,1,1,1,2020-03-18,0.0,0.0,0.000000
360,03/19/2020,51199,York,Peninsula,1,1,1,2020-03-19,0.0,0.0,0.000000
493,03/20/2020,51199,York,Peninsula,3,1,1,2020-03-20,2.0,0.0,0.000000
626,03/21/2020,51199,York,Peninsula,3,1,1,2020-03-21,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
33211,11/21/2020,51199,York,Peninsula,817,27,9,2020-11-21,11.0,120.0,177.038152
33344,11/22/2020,51199,York,Peninsula,825,27,9,2020-11-22,8.0,118.0,174.087516
33477,11/23/2020,51199,York,Peninsula,848,27,9,2020-11-23,23.0,138.0,203.593874
33610,11/24/2020,51199,York,Peninsula,859,27,9,2020-11-24,11.0,146.0,215.396418


In [11]:
ph = dfy.plot(y='per100k_14daysum',x='date',title="York County Number of new cases per 100,000 persons \nwithin the last 14 days")

ph

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'York County Number of new cases per 100,000 persons \nwithin the last 14 days'}, xlabel='date'>

In [12]:
ph = dfy.plot(y='TC_diff',x='date',title="York County Cases, 14 day sum, per 100K")
ph

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'York County Cases, 14 day sum, per 100K'}, xlabel='date'>

In [13]:
TOOLTIPS = [
 #   ("index", "$index"),
 #   ("date:", "$x{%F %T}"),
    ("date:", "@date{%F}"),
    ("cases/14d/100k:","@per100k_14daysum"),
 #   ("(x,y)", "($x, $y)"),
]


#p=bokeh.plotting.figure( tooltips=TOOLTIPS, x_axis_type='datetime')
p=bokeh.plotting.figure( x_axis_type='datetime',y_range=(0,250),
#                        tooltips=TOOLTIPS,formatters={"$x": "datetime"},
                        title="York County Number of new cases per 100,000 persons within the last 14 days")

    
hth = bokeh.models.HoverTool(tooltips=TOOLTIPS,
                             formatters={"$x": "datetime",
                                        "@date": "datetime"
                                        },
                             mode='vline',
                            )

print(hth)
print(hth.formatters)
p.add_tools(hth)
#hover = p.select(dict(type=bokeh.models.HoverTool))


#hover(tooltips=TOOLTIPS,
#)

p.add_layout(bokeh.models.BoxAnnotation(bottom=0,top=5, fill_alpha=0.4, fill_color='olive'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=5,top=20, fill_alpha=0.4, fill_color='green'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=20,top=50, fill_alpha=0.4, fill_color='yellow'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=50,top=200, fill_alpha=0.4, fill_color='orange'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=200, fill_alpha=0.4, fill_color='red'))



#p.line(dfy['date'],dfy['per100k_14daysum'])
p.line(x='date', y='per100k_14daysum',source=dfy)
#p.title()

#?p.line

HoverTool(id='1035', ...)
{'$x': 'datetime', '@date': 'datetime'}


In [14]:
bokeh.plotting.show(p)

In [15]:
bokeh.plotting.output_file('YorkCountyCovidMetric_plot.html', mode='inline')
bokeh.plotting.save(p)

# needs geckodriver  -- have it in conda env py3plot
bokeh.io.export_png(p, filename="YorkCountyCovidMetric_plot.png")



'/Users/drf/2020/Home/School/YCSD_covid_metrics/YorkCountyCovidMetric_plot.png'

In [None]:
# Save notebook as a python script:
! jupyter nbconvert --to script *.ipynb

[NbConvertApp] Converting notebook AllCountyCovidMetric.ipynb to script
