In [1]:
from datetime import date, datetime, timedelta
from kaleido.scopes.plotly import PlotlyScope
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sodapy import Socrata
from typing import List, Tuple

In [2]:
client = Socrata('data.virginia.gov', None)
results = client.get('bre9-aqqr', limit=50000)



In [3]:
df = pd.DataFrame.from_records(results)

In [4]:
df.columns

Index(['report_date', 'fips', 'locality', 'vdh_health_district', 'total_cases',
       'hospitalizations', 'deaths'],
      dtype='object')

In [5]:
df.head()

Unnamed: 0,report_date,fips,locality,vdh_health_district,total_cases,hospitalizations,deaths
0,2020-11-01T00:00:00.000,51001,Accomack,Eastern Shore,1227,98,19
1,2020-11-01T00:00:00.000,51003,Albemarle,Thomas Jefferson,1612,96,24
2,2020-11-01T00:00:00.000,51005,Alleghany,Alleghany,154,13,4
3,2020-11-01T00:00:00.000,51007,Amelia,Piedmont,150,17,6
4,2020-11-01T00:00:00.000,51009,Amherst,Central Virginia,626,25,6


In [6]:
df.dtypes

report_date            object
fips                   object
locality               object
vdh_health_district    object
total_cases            object
hospitalizations       object
deaths                 object
dtype: object

In [4]:
df['report_date'] = pd.to_datetime(
    df['report_date'], format='%Y-%m-%d'
    )
df['total_cases'] = df['total_cases'].astype(int)

In [5]:
c_field = df[df['locality'] == 'Chesterfield']

In [9]:
c_field

Unnamed: 0,report_date,fips,locality,vdh_health_district,total_cases,hospitalizations,deaths
20,2020-11-01,51041,Chesterfield,Chesterfield,7302,426,120
153,2020-10-31,51041,Chesterfield,Chesterfield,7274,424,120
286,2020-10-30,51041,Chesterfield,Chesterfield,7212,421,119
419,2020-10-29,51041,Chesterfield,Chesterfield,7162,418,118
552,2020-10-28,51041,Chesterfield,Chesterfield,7111,414,113
...,...,...,...,...,...,...,...
29945,2020-03-21,51041,Chesterfield,Chesterfield,7,1,0
30078,2020-03-20,51041,Chesterfield,Chesterfield,5,1,0
30211,2020-03-19,51041,Chesterfield,Chesterfield,6,3,0
30344,2020-03-18,51041,Chesterfield,Chesterfield,5,2,0


In [10]:
c_field[['report_date', 'total_cases']].sort_values(by='report_date')

Unnamed: 0,report_date,total_cases
30477,2020-03-17,4
30344,2020-03-18,5
30211,2020-03-19,6
30078,2020-03-20,5
29945,2020-03-21,7
...,...,...
552,2020-10-28,7111
419,2020-10-29,7162
286,2020-10-30,7212
153,2020-10-31,7274


In [10]:
c_field['new_cases'] = c_field['total_cases'].diff(periods=-1).fillna(0).astype(int)

In [11]:
c_field

Unnamed: 0,report_date,fips,locality,vdh_health_district,total_cases,hospitalizations,deaths,new_cases
20,2020-11-06,51041,Chesterfield,Chesterfield,7481,438,120,36
153,2020-11-05,51041,Chesterfield,Chesterfield,7445,435,120,38
286,2020-11-04,51041,Chesterfield,Chesterfield,7407,434,120,46
419,2020-11-03,51041,Chesterfield,Chesterfield,7361,431,120,41
552,2020-11-02,51041,Chesterfield,Chesterfield,7320,426,120,18
...,...,...,...,...,...,...,...,...
30610,2020-03-21,51041,Chesterfield,Chesterfield,7,1,0,2
30743,2020-03-20,51041,Chesterfield,Chesterfield,5,1,0,-1
30876,2020-03-19,51041,Chesterfield,Chesterfield,6,3,0,1
31009,2020-03-18,51041,Chesterfield,Chesterfield,5,2,0,1


In [13]:
import plotly.figure_factory as ff
import numpy as np

In [14]:
oct_28 = df.query('report_date == "2020-10-28"')

In [15]:
oct_28

Unnamed: 0,report_date,fips,locality,vdh_health_district,total_cases,hospitalizations,deaths
532,2020-10-28,51001,Accomack,Eastern Shore,1214,97,19
533,2020-10-28,51003,Albemarle,Thomas Jefferson,1585,93,24
534,2020-10-28,51005,Alleghany,Alleghany,112,11,4
535,2020-10-28,51007,Amelia,Piedmont,147,17,6
536,2020-10-28,51009,Amherst,Central Virginia,600,25,6
...,...,...,...,...,...,...,...
660,2020-10-28,51800,Suffolk,Western Tidewater,2300,136,78
661,2020-10-28,51810,Virginia Beach,Virginia Beach,7926,432,106
662,2020-10-28,51820,Waynesboro,Central Shenandoah,397,11,4
663,2020-10-28,51830,Williamsburg,Peninsula,216,14,8


In [16]:
values = oct_28['total_cases']
fips = oct_28['fips']

endpts = list(np.mgrid[min(values):max(values):11j])
colorscale = ["#030512","#1d1d3b","#323268","#3d4b94","#3e6ab0", "#4989bc","#60a7c7","#85c5d3","#b7e0e4","#eafcfd"]
fig = ff.create_choropleth(
    fips=fips,
    values=values,
    scope=['Virginia'],
    colorscale=px.colors.sequential.tempo,
    binning_endpoints=endpts,
    show_state_data=True,
    title_text='October 28, 2020',
    title_y=0.95,
    round_legend_values=True,
    legend_title='Total COVID-19 Cases',
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
)
fig.layout.template = None
fig.show()

In [19]:
arl = df[df['locality'] == 'Arlington']

In [30]:
arl[['report_date', 'total_cases']]

Unnamed: 0,report_date,total_cases
6,2020-11-01,4764
139,2020-10-31,4714
272,2020-10-30,4686
405,2020-10-29,4647
538,2020-10-28,4630
...,...,...
29931,2020-03-21,22
30064,2020-03-20,17
30197,2020-03-19,17
30330,2020-03-18,14


In [28]:
df.groupby(['locality', 'report_date'])['total_cases'].max()
# c_field['new_cases'] = c_field['total_cases'].diff(periods=-1)

locality  report_date
Accomack  2020-03-17       0
          2020-03-18       0
          2020-03-19       0
          2020-03-20       1
          2020-03-21       1
                        ... 
York      2020-10-28     644
          2020-10-29     646
          2020-10-30     650
          2020-10-31     658
          2020-11-01     658
Name: total_cases, Length: 30590, dtype: int32

In [23]:
df

Unnamed: 0,report_date,fips,locality,vdh_health_district,total_cases,hospitalizations,deaths
0,2020-11-01,51001,Accomack,Eastern Shore,1227,98,19
1,2020-11-01,51003,Albemarle,Thomas Jefferson,1612,96,24
2,2020-11-01,51005,Alleghany,Alleghany,154,13,4
3,2020-11-01,51007,Amelia,Piedmont,150,17,6
4,2020-11-01,51009,Amherst,Central Virginia,626,25,6
...,...,...,...,...,...,...,...
30585,2020-03-17,51800,Suffolk,Western Tidewater,0,0,0
30586,2020-03-17,51810,Virginia Beach,Virginia Beach,4,0,0
30587,2020-03-17,51820,Waynesboro,Central Shenandoah,0,0,0
30588,2020-03-17,51830,Williamsburg,Peninsula,1,0,0


In [31]:
from covid import calc_7_day_avg

In [44]:
def locality_line(localities: list, df=df):
    '''Plot total cases vs. 7-day avg.'''
    fig = go.Figure()

    for locality in localities:
        locality_df = df[df['locality'] == locality]

        fig.add_trace(go.Scatter(x=locality_df['report_date'],
                                 y=locality_df['total_cases'],
                                 name=locality)
                      )

    fig.update_layout(title_text='COVID-19 in VA',
                      title_x=0.5)
    fig.update_xaxes(title_text='Date')
    fig.update_yaxes(title_text='Total Cases')
    fig.show()

In [56]:
locality_line(['Alexandria', 'Arlington', 'Chesterfield', 'Fairfax', 'Fairfax City', 'Falls Church', 'Loudoun', 'Newport News'])

In [55]:
sorted(list(df['locality'].unique()))

['Accomack',
 'Albemarle',
 'Alexandria',
 'Alleghany',
 'Amelia',
 'Amherst',
 'Appomattox',
 'Arlington',
 'Augusta',
 'Bath',
 'Bedford',
 'Bland',
 'Botetourt',
 'Bristol',
 'Brunswick',
 'Buchanan',
 'Buckingham',
 'Buena Vista City',
 'Campbell',
 'Caroline',
 'Carroll',
 'Charles City',
 'Charlotte',
 'Charlottesville',
 'Chesapeake',
 'Chesterfield',
 'Clarke',
 'Colonial Heights',
 'Covington',
 'Craig',
 'Culpeper',
 'Cumberland',
 'Danville',
 'Dickenson',
 'Dinwiddie',
 'Emporia',
 'Essex',
 'Fairfax',
 'Fairfax City',
 'Falls Church',
 'Fauquier',
 'Floyd',
 'Fluvanna',
 'Franklin City',
 'Franklin County',
 'Frederick',
 'Fredericksburg',
 'Galax',
 'Giles',
 'Gloucester',
 'Goochland',
 'Grayson',
 'Greene',
 'Greensville',
 'Halifax',
 'Hampton',
 'Hanover',
 'Harrisonburg',
 'Henrico',
 'Henry',
 'Highland',
 'Hopewell',
 'Isle of Wight',
 'James City',
 'King George',
 'King William',
 'King and Queen',
 'Lancaster',
 'Lee',
 'Lexington',
 'Loudoun',
 'Louisa',
 'Lune