# Time Series of YCSD Covid Case Metric

This notebook examines the time series of the York County COVID Cases ostensibly used by YCSD to make decisions about school reopenings.

* YCSD presents there metrics on https://ycsd.yorkcountyschools.org/domain/1313
* VDH represents the this number for the localities on https://www.vdh.virginia.gov/coronavirus/coronavirus/covid-19-in-virginia-locality/ and on https://www.vdh.virginia.gov/coronavirus/key-measures/pandemic-metrics/school-metrics/ under the localities tab
* VDH shares the data at https://data.virginia.gov/Government/VDH-COVID-19-PublicUseDataset-Cases/bre9-aqqr
* I'm sharing This notebook in Github at https://github.com/drf5n/YCSD_covid_metrics

-- David Forrest


In [1]:
%matplotlib widget
import os,sys,io, time, pathlib
import pandas as pd
#import numpy as np, matplotlib as mpl, matplotlib.pyplot as plt

import bokeh.plotting
import bokeh.io
import bokeh.models
from bokeh.io import output_notebook
bokeh.io.output_notebook()

In [2]:
def file_age(filepath):
    return time.time() - os.path.getmtime(filepath)


In [3]:
# get the Virginia COVID Case data from https://data.virginia.gov/Government/VDH-COVID-19-PublicUseDataset-Cases/bre9-aqqr

df_name = "VA_vdh_casedata.csv"
if file_age(df_name) > 86400/2:
    !wget -O $df_name 'https://data.virginia.gov/api/views/bre9-aqqr/rows.csv?accessType=DOWNLOAD'
    pathlib.Path(df_name).touch()

In [4]:
df=pd.read_csv(df_name)
df["date"] = pd.to_datetime(df['Report Date'])

df.tail()

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date
34309,11/29/2020,51800,Suffolk,Western Tidewater,2762,160,80,2020-11-29
34310,11/29/2020,51810,Virginia Beach,Virginia Beach,10815,527,113,2020-11-29
34311,11/29/2020,51820,Waynesboro,Central Shenandoah,622,19,5,2020-11-29
34312,11/29/2020,51830,Williamsburg,Peninsula,289,17,8,2020-11-29
34313,11/29/2020,51840,Winchester,Lord Fairfax,1019,59,4,2020-11-29


In [5]:

df = df.sort_values(by=['Locality', 'VDH Health District', 'date'])
display(df.head())

df['TC_diff']= df.groupby('Locality')['Total Cases'].diff().fillna(0)
df['TC_sum14']= df.groupby('Locality')['Total Cases'].diff(14).fillna(0)

display(df.tail())



Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date
0,03/17/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-17
133,03/18/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-18
266,03/19/2020,51001,Accomack,Eastern Shore,0,0,0,2020-03-19
399,03/20/2020,51001,Accomack,Eastern Shore,1,0,0,2020-03-20
532,03/21/2020,51001,Accomack,Eastern Shore,1,0,0,2020-03-21


Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date,TC_diff,TC_sum14
33743,11/25/2020,51199,York,Peninsula,867,27,9,2020-11-25,8.0,149.0
33876,11/26/2020,51199,York,Peninsula,875,27,9,2020-11-26,8.0,156.0
34009,11/27/2020,51199,York,Peninsula,877,27,9,2020-11-27,2.0,153.0
34142,11/28/2020,51199,York,Peninsula,889,27,9,2020-11-28,12.0,154.0
34275,11/29/2020,51199,York,Peninsula,908,27,9,2020-11-29,19.0,163.0


In [6]:
# subset for York and normalize per capita
dfy = df[df['Locality']=='York'].copy()
dfy['per100k_14daysum']=dfy['TC_sum14']*100000/68280  

In [7]:
dfy.tail(30)

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths,date,TC_diff,TC_sum14,per100k_14daysum
30418,10/31/2020,51199,York,Peninsula,658,21,9,2020-10-31,8.0,42.0,61.511424
30551,11/01/2020,51199,York,Peninsula,658,22,9,2020-11-01,0.0,42.0,61.511424
30684,11/02/2020,51199,York,Peninsula,661,22,9,2020-11-02,3.0,44.0,64.440539
30817,11/03/2020,51199,York,Peninsula,665,22,9,2020-11-03,4.0,45.0,65.905097
30950,11/04/2020,51199,York,Peninsula,668,22,9,2020-11-04,3.0,43.0,62.975981
31083,11/05/2020,51199,York,Peninsula,670,22,9,2020-11-05,2.0,44.0,64.440539
31216,11/06/2020,51199,York,Peninsula,686,22,9,2020-11-06,16.0,60.0,87.873462
31349,11/07/2020,51199,York,Peninsula,697,22,9,2020-11-07,11.0,68.0,99.589924
31482,11/08/2020,51199,York,Peninsula,707,23,9,2020-11-08,10.0,70.0,102.519039
31615,11/09/2020,51199,York,Peninsula,710,23,9,2020-11-09,3.0,73.0,106.912712


In [8]:
ph = dfy.plot(y='per100k_14daysum',x='date',title="York County Number of new cases per 100,000 persons \nwithin the last 14 days")

ph

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'York County Number of new cases per 100,000 persons \nwithin the last 14 days'}, xlabel='date'>

In [9]:
ph = dfy.plot(y='TC_diff',x='date',title="York County Cases, 14 day sum, per 100K")
ph

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'York County Cases, 14 day sum, per 100K'}, xlabel='date'>

In [10]:
TOOLTIPS = [
 #   ("index", "$index"),
 #   ("date:", "$x{%F %T}"),
    ("date:", "@date{%F}"),
    ("cases/14d/100k:","@per100k_14daysum"),
 #   ("(x,y)", "($x, $y)"),
]


#p=bokeh.plotting.figure( tooltips=TOOLTIPS, x_axis_type='datetime')
p=bokeh.plotting.figure( x_axis_type='datetime',y_range=(0,250),
#                        tooltips=TOOLTIPS,formatters={"$x": "datetime"},
                        title="York County Number of new cases per 100,000 persons within the last 14 days")

    
hth = bokeh.models.HoverTool(tooltips=TOOLTIPS,
                             formatters={"$x": "datetime",
                                        "@date": "datetime"
                                        },
                             mode='vline',
                            )

print(hth)
print(hth.formatters)
p.add_tools(hth)
#hover = p.select(dict(type=bokeh.models.HoverTool))


#hover(tooltips=TOOLTIPS,
#)

p.add_layout(bokeh.models.BoxAnnotation(bottom=0,top=5, fill_alpha=0.4, fill_color='olive'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=5,top=20, fill_alpha=0.4, fill_color='green'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=20,top=50, fill_alpha=0.4, fill_color='yellow'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=50,top=200, fill_alpha=0.4, fill_color='orange'))
p.add_layout(bokeh.models.BoxAnnotation(bottom=200, fill_alpha=0.4, fill_color='red'))



#p.line(dfy['date'],dfy['per100k_14daysum'])
p.line(x='date', y='per100k_14daysum',source=dfy)
#p.title()

#?p.line

HoverTool(id='1035', ...)
{'$x': 'datetime', '@date': 'datetime'}


In [11]:
bokeh.plotting.show(p)

In [12]:
bokeh.plotting.output_file('YorkCountyCovidMetric_plot.html', mode='inline')
bokeh.plotting.save(p)

# needs geckodriver  -- have it in conda env py3plot
bokeh.io.export_png(p, filename="YorkCountyCovidMetric_plot.png")



'/Users/drf/2020/Home/School/YCSD_covid_metrics/YorkCountyCovidMetric_plot.png'

In [13]:
# Save notebook as a python script:
! jupyter nbconvert --to script *.ipynb

[NbConvertApp] Converting notebook AllCountyCovidMetric.ipynb to script
[NbConvertApp] Writing 9575 bytes to AllCountyCovidMetric.py
[NbConvertApp] Converting notebook YorkCountyCovidMetric.ipynb to script
[NbConvertApp] Writing 4238 bytes to YorkCountyCovidMetric.py
