In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sodapy import Socrata

In [2]:
client = Socrata('data.cdc.gov', None)
# Use None for public data set.
results = client.get('9mfq-cb36', limit=20_000)
df = pd.DataFrame.from_records(results)
df['submission_date'] = pd.to_datetime(df['submission_date'], format='%Y-%m-%d')
df['new_case'] = df['new_case'].astype(float).astype(int)
df['tot_cases'] = df['tot_cases'].astype(int)



In [3]:
df.tail()

Unnamed: 0,submission_date,state,tot_cases,new_case,tot_death,new_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,pnew_case,conf_death,prob_death,pnew_death
16555,2020-10-19,PW,0,0,0,0.0,2020-10-20T14:59:27.967,,,,,0,,,0
16556,2020-10-20,PW,0,0,0,0.0,2020-10-21T13:34:31.370,,,,,0,,,0
16557,2020-10-21,PW,0,0,0,0.0,2020-10-22T13:34:46.095,,,,,0,,,0
16558,2020-10-22,PW,0,0,0,0.0,2020-10-23T13:44:31.471,,,,,0,,,0
16559,2020-10-23,PW,0,0,0,0.0,2020-10-24T13:26:44.189,,,,,0,,,0


In [4]:
df.columns

Index(['submission_date', 'state', 'tot_cases', 'new_case', 'tot_death',
       'new_death', 'created_at', 'consent_cases', 'consent_deaths',
       'conf_cases', 'prob_cases', 'pnew_case', 'conf_death', 'prob_death',
       'pnew_death'],
      dtype='object')

In [5]:
df.dtypes

submission_date    datetime64[ns]
state                      object
tot_cases                   int32
new_case                    int32
tot_death                  object
new_death                  object
created_at                 object
consent_cases              object
consent_deaths             object
conf_cases                 object
prob_cases                 object
pnew_case                  object
conf_death                 object
prob_death                 object
pnew_death                 object
dtype: object

In [6]:
str(df[df['submission_date'] == '2020-10-01'].query('state == "CO"').submission_date)

'253   2020-10-01\nName: submission_date, dtype: datetime64[ns]'

In [7]:
oct_18_2020 = df.query('submission_date == "2020-10-18T00:00:00.000"')

In [8]:
fig = go.Figure(data=go.Choropleth(
    locations=oct_18_2020['state'],
    z=oct_18_2020['tot_cases'].astype(int),
    locationmode='USA-states',
    # colorscale='blues',
    colorscale=px.colors.sequential.matter,
    colorbar_title='Total Cases'
    )
)

fig.update_layout(
    title_text='Total Coronavirus Cases Reported as of October 18, 2020',
    title_x=0.5,
    geo_scope='usa',
)
fig.show()

In [9]:
fig = go.Figure(data=go.Choropleth(
    locations=oct_18_2020['state'],
    z=oct_18_2020['new_case'],
    locationmode='USA-states',
    # colorscale='reds',
    colorscale=px.colors.sequential.Reds,
    colorbar_title='Total Cases'
    )
)

fig.update_layout(
    title_text='New Coronavirus Cases - October 18, 2020',
    title_x=0.5,
    geo_scope='usa',
)
fig.show()

In [10]:
type(oct_18_2020['new_case'].iloc[0])

numpy.int32

In [11]:
oct_18_2020[oct_18_2020['state'] == 'WI']

Unnamed: 0,submission_date,state,tot_cases,new_case,tot_death,new_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,pnew_case,conf_death,prob_death,pnew_death
7170,2020-10-18,WI,175227,0,1588,0.0,2020-10-19T14:06:44.391,Agree,Agree,166186.0,9041,0,1574.0,14,0


In [12]:
df.columns

Index(['submission_date', 'state', 'tot_cases', 'new_case', 'tot_death',
       'new_death', 'created_at', 'consent_cases', 'consent_deaths',
       'conf_cases', 'prob_cases', 'pnew_case', 'conf_death', 'prob_death',
       'pnew_death'],
      dtype='object')

In [13]:
type(df['submission_date'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [14]:
df['submission_date'] = pd.to_datetime(df['submission_date'], format='%Y-%m-%d')

In [15]:
df.dtypes

submission_date    datetime64[ns]
state                      object
tot_cases                   int32
new_case                    int32
tot_death                  object
new_death                  object
created_at                 object
consent_cases              object
consent_deaths             object
conf_cases                 object
prob_cases                 object
pnew_case                  object
conf_death                 object
prob_death                 object
pnew_death                 object
dtype: object

In [16]:
jan_22_2020 = df.query('submission_date == "2020-01-22"')

In [17]:
fig = go.Figure(data=go.Choropleth(
    locations=jan_22_2020['state'],
    z=jan_22_2020['new_case'],
    locationmode='USA-states',
    colorscale='reds',
    colorbar_title='Total Cases'
    )
)

fig.update_layout(
    title_text='New Coronavirus Cases - January 22, 2020',
    title_x=0.5,
    geo_scope='usa',
)
print(fig)
# fig.show()

Figure({
    'data': [{'colorbar': {'title': {'text': 'Total Cases'}},
              'colorscale': [[0.0, 'rgb(255,245,240)'], [0.125,
                             'rgb(254,224,210)'], [0.25, 'rgb(252,187,161)'],
                             [0.375, 'rgb(252,146,114)'], [0.5, 'rgb(251,106,74)'],
                             [0.625, 'rgb(239,59,44)'], [0.75, 'rgb(203,24,29)'],
                             [0.875, 'rgb(165,15,21)'], [1.0, 'rgb(103,0,13)']],
              'locationmode': 'USA-states',
              'locations': array(['CO', 'FL', 'AZ', 'SC', 'CT', 'NE', 'KY', 'WY', 'IA', 'NM', 'ND', 'WA',
                                  'RMI', 'TN', 'AS', 'MA', 'PA', 'NYC', 'OH', 'AL', 'VA', 'MI', 'MS',
                                  'IL', 'CA', 'WI', 'TX', 'PR', 'NJ', 'LA', 'OK', 'NC', 'NV', 'ID', 'GA',
                                  'NY', 'MD', 'IN', 'AR', 'OR', 'UT', 'MN', 'MO', 'DE', 'WV', 'RI', 'SD',
                                  'DC', 'NH', 'KS', 'ME', 'MT', 'HI', 'AK', 

In [18]:
mar_14_2020 = df.query('submission_date == "2020-03-14"')

In [19]:
fig = go.Figure(data=go.Choropleth(
    locations=mar_14_2020['state'],
    z=mar_14_2020['new_case'],
    locationmode='USA-states',
    colorscale='reds',
    colorbar_title='Total Cases'
    )
)

print(fig)

Figure({
    'data': [{'colorbar': {'title': {'text': 'Total Cases'}},
              'colorscale': [[0.0, 'rgb(255,245,240)'], [0.125,
                             'rgb(254,224,210)'], [0.25, 'rgb(252,187,161)'],
                             [0.375, 'rgb(252,146,114)'], [0.5, 'rgb(251,106,74)'],
                             [0.625, 'rgb(239,59,44)'], [0.75, 'rgb(203,24,29)'],
                             [0.875, 'rgb(165,15,21)'], [1.0, 'rgb(103,0,13)']],
              'locationmode': 'USA-states',
              'locations': array(['CO', 'FL', 'AZ', 'SC', 'CT', 'NE', 'KY', 'WY', 'IA', 'NM', 'ND', 'WA',
                                  'RMI', 'TN', 'AS', 'MA', 'PA', 'NYC', 'OH', 'AL', 'VA', 'MI', 'MS',
                                  'IL', 'CA', 'WI', 'TX', 'PR', 'NJ', 'LA', 'OK', 'NC', 'NV', 'ID', 'GA',
                                  'NY', 'MD', 'IN', 'AR', 'OR', 'UT', 'MN', 'MO', 'DE', 'WV', 'RI', 'SD',
                                  'DC', 'NH', 'KS', 'ME', 'MT', 'HI', 'AK', 

In [20]:
df.set_index('submission_date')

Unnamed: 0_level_0,state,tot_cases,new_case,tot_death,new_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,pnew_case,conf_death,prob_death,pnew_death
submission_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-22,CO,0,0,0,0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
2020-01-23,CO,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
2020-01-24,CO,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
2020-01-25,CO,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
2020-01-26,CO,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-19,PW,0,0,0,0.0,2020-10-20T14:59:27.967,,,,,0,,,0
2020-10-20,PW,0,0,0,0.0,2020-10-21T13:34:31.370,,,,,0,,,0
2020-10-21,PW,0,0,0,0.0,2020-10-22T13:34:46.095,,,,,0,,,0
2020-10-22,PW,0,0,0,0.0,2020-10-23T13:44:31.471,,,,,0,,,0


In [21]:
mar = df.query('submission_date == "2020-03-14"')

In [22]:
fig = go.Figure(data=go.Choropleth(
    locations=mar['state'],
    z=mar['new_case'],
    locationmode='USA-states',
    colorscale='reds',
    colorbar_title='Total Cases'
    )
)

fig.update_layout(
    title_text='New Coronavirus Cases - March 14, 2020',
    title_x=0.5,
    geo_scope='usa',
)

fig.show()

In [23]:
mar.dtypes

submission_date    datetime64[ns]
state                      object
tot_cases                   int32
new_case                    int32
tot_death                  object
new_death                  object
created_at                 object
consent_cases              object
consent_deaths             object
conf_cases                 object
prob_cases                 object
pnew_case                  object
conf_death                 object
prob_death                 object
pnew_death                 object
dtype: object

In [24]:
mar['new_case'] = mar['new_case'].astype(float).astype(int)

In [25]:
mar.dtypes

submission_date    datetime64[ns]
state                      object
tot_cases                   int32
new_case                    int32
tot_death                  object
new_death                  object
created_at                 object
consent_cases              object
consent_deaths             object
conf_cases                 object
prob_cases                 object
pnew_case                  object
conf_death                 object
prob_death                 object
pnew_death                 object
dtype: object

In [26]:
mar['tot_cases'] = mar['tot_cases'].astype(int)

In [27]:
mar[['state', 'new_case', 'tot_cases']]

Unnamed: 0,state,new_case,tot_cases
52,CO,0,49
328,FL,24,55
604,AZ,2,12
880,SC,0,12
1156,CT,0,7
1432,NE,0,10
1708,KY,4,15
1984,WY,1,2
2260,IA,0,16
2536,NM,2,7


In [28]:
df.query('submission_date == "2020-10-01"')

Unnamed: 0,submission_date,state,tot_cases,new_case,tot_death,new_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,pnew_case,conf_death,prob_death,pnew_death
253,2020-10-01,CO,71218,682,2054,3.0,2020-10-02T13:40:08.052,Agree,Agree,66242.0,4976.0,18,1696.0,358.0,1
529,2020-10-01,FL,700602,2551,14444,127.0,2020-10-02T13:40:08.052,Not agree,Not agree,,,532,,,4
805,2020-10-01,AZ,219212,705,5674,24.0,2020-10-02T13:40:08.052,Agree,Agree,214608.0,4604.0,31,5401.0,273.0,5
1081,2020-10-01,SC,148323,381,3400,22.0,2020-10-02T13:40:08.052,Agree,Agree,143787.0,4536.0,217,3203.0,197.0,5
1357,2020-10-01,CT,57742,192,4511,3.0,2020-10-02T13:40:08.052,Agree,Agree,55306.0,2436.0,15,3613.0,898.0,0
1633,2020-10-01,NE,46185,621,493,15.0,2020-10-02T13:40:08.052,Not agree,Not agree,,,0,,,0
1909,2020-10-01,KY,69728,888,1191,17.0,2020-10-02T13:40:08.052,Agree,Agree,60875.0,8853.0,231,1180.0,11.0,0
2185,2020-10-01,WY,6083,135,53,3.0,2020-10-02T13:40:08.052,Agree,Agree,5170.0,913.0,11,53.0,0.0,0
2461,2020-10-01,IA,90009,1179,1360,14.0,2020-10-02T13:40:08.052,Not agree,Not agree,,,142,,,0
2737,2020-10-01,NM,29661,226,882,5.0,2020-10-02T13:40:08.052,,Not agree,,,0,,,0


In [29]:
cases = df.query('submission_date == "2020-10-03"')
fig = go.Figure(data=go.Choropleth(
    locations=cases['state'],
    z=cases['new_case'],
    locationmode='USA-states',
    colorscale='reds',
    zmin=0,
    zmax=cases['new_case'].max() + 1,
    colorbar_title='Total Cases'
    )
)

fig.update_layout(
    title_text=f'New COVID-19 Cases Oct 3, 2020',
    title_x=0.5,
    geo_scope='usa',
)
fig.data[0].update(zmin=0.2)
fig.show()

In [30]:
df.query('submission_date == "2020-01-31"').sort_values('new_case', ascending=False)

Unnamed: 0,submission_date,state,tot_cases,new_case,tot_death,new_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,pnew_case,conf_death,prob_death,pnew_death
6633,2020-01-31,CA,3,1,0,0.0,2020-03-26T16:22:39.452,Not agree,Not agree,,,,,,
6357,2020-01-31,IL,2,1,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
9,2020-01-31,CO,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
12153,2020-01-31,WV,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Not agree,,,,,,
9117,2020-01-31,ID,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
9393,2020-01-31,GA,0,0,0,0.0,2020-03-26T16:22:39.452,Not agree,Not agree,,,,,,
9669,2020-01-31,NY,0,0,0,0.0,2020-03-26T16:22:39.452,Not agree,Not agree,,,,,,
9945,2020-01-31,MD,0,0,0,0.0,2020-03-26T16:22:39.452,,Agree,,,,,,
10221,2020-01-31,IN,0,0,0,0.0,2020-03-26T16:22:39.452,Not agree,Agree,,,,,,
10497,2020-01-31,AR,0,0,0,0.0,2020-03-26T16:22:39.452,Not agree,Not agree,,,,,,


In [31]:
virginia = df[df['state'] == 'VA']

In [32]:
fig = go.Figure()
fig.add_trace(go.Bar(x=virginia['submission_date'], y=virginia['new_case'], name='New Cases'))
fig.update_layout(title_text='COVID-19 in Virginia', 
                  title_x=0.5)
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='New Cases')
fig.show()

In [33]:
virginia

Unnamed: 0,submission_date,state,tot_cases,new_case,tot_death,new_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,pnew_case,conf_death,prob_death,pnew_death
5520,2020-01-22,VA,0,0,0,0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
5521,2020-01-23,VA,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
5522,2020-01-24,VA,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
5523,2020-01-25,VA,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
5524,2020-01-26,VA,0,0,0,0.0,2020-03-26T16:22:39.452,Agree,Agree,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5791,2020-10-19,VA,167754,926,3485,28.0,2020-10-20T14:59:27.967,Agree,Agree,157213.0,10541,152,3236.0,249,1
5792,2020-10-20,VA,168772,1018,3515,30.0,2020-10-21T13:34:31.370,Agree,Agree,157998.0,10774,233,3266.0,249,0
5793,2020-10-21,VA,170104,1332,3524,9.0,2020-10-22T13:34:46.095,Agree,Agree,159060.0,11044,270,3274.0,250,1
5794,2020-10-22,VA,171284,1180,3539,15.0,2020-10-23T13:44:31.471,Agree,Agree,160004.0,11280,236,3293.0,246,-4


In [34]:
va_new_cases = tuple(virginia['new_case'])

In [35]:
va_7_day_avg = []
for x in range(len(va_new_cases)):
    if x < 7:
        va_7_day_avg.append(round(sum(va_new_cases[:x+1]) / (x + 1)))
    else:
        va_7_day_avg.append(round(sum(va_new_cases[x - 7:x + 1]) / 7))

In [37]:
v_select = virginia[['submission_date', 'tot_cases', 'new_case']]

In [38]:
v_select['avg_7_day'] = va_7_day_avg

In [39]:
v_select[40:80]

Unnamed: 0,submission_date,tot_cases,new_case,avg_7_day
5560,2020-03-02,0,0,0
5561,2020-03-03,0,0,0
5562,2020-03-04,0,0,0
5563,2020-03-05,0,0,0
5564,2020-03-06,0,0,0
5565,2020-03-07,0,0,0
5566,2020-03-08,2,2,0
5567,2020-03-09,5,3,1
5568,2020-03-10,8,3,1
5569,2020-03-11,15,7,2


In [40]:
v_select

Unnamed: 0,submission_date,tot_cases,new_case,avg_7_day
5520,2020-01-22,0,0,0
5521,2020-01-23,0,0,0
5522,2020-01-24,0,0,0
5523,2020-01-25,0,0,0
5524,2020-01-26,0,0,0
...,...,...,...,...
5791,2020-10-19,167754,926,1169
5792,2020-10-20,168772,1018,1138
5793,2020-10-21,170104,1332,1213
5794,2020-10-22,171284,1180,1192


In [41]:
fig = go.Figure()
fig.add_trace(go.Bar(x=v_select['submission_date'], y=v_select['new_case'], name='New Cases'))
fig.add_trace(go.Scatter(x=v_select['submission_date'], y=v_select['avg_7_day'], name='7 Day Avg'))
fig.update_layout(title_text='COVID-19 in Virginia', 
                  title_x=0.5)
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='New Cases')
fig.show()

In [42]:
from covid import state_line



In [43]:
state_line('TX')

In [44]:
state_line('IL')

In [45]:
total_cases = df.groupby('submission_date', as_index=False)['tot_cases'].sum()

In [46]:
total_cases

Unnamed: 0,submission_date,tot_cases
0,2020-01-22,1
1,2020-01-23,1
2,2020-01-24,2
3,2020-01-25,2
4,2020-01-26,5
...,...,...
271,2020-10-19,8188585
272,2020-10-20,8249011
273,2020-10-21,8312667
274,2020-10-22,8387047


In [47]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=total_cases['submission_date'], y=total_cases['tot_cases'], line_color='red'))
fig.update_layout(title_text='Total COVID-19 Cases', 
                  title_x=0.5)
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Total Cases')
fig.show()

In [48]:
new_cases = df.groupby('submission_date', as_index=False)['new_case'].sum()

In [49]:
fig = go.Figure()
fig.add_trace(go.Bar(x=new_cases['submission_date'], y=new_cases['new_case'], marker_color='blue'))
fig.update_layout(title_text='New COVID-19 Cases',
                  title_x=0.5)
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Total Cases')
fig.show()

In [50]:
def calc_7_day_avg(column):
    averages = []
    for x in range(len(column)):
        if x < 7:
            averages.append(round(sum(column[:x+1]) / (x + 1)))
        else:
            averages.append(round(sum(column[x - 7:x + 1]) / 7))
    return averages

In [51]:
avg_7_day = calc_7_day_avg(new_cases['new_case'])

In [52]:
new_cases['avg_7_day'] = avg_7_day

In [96]:
fig = go.Figure()
fig.add_trace(go.Bar(x=new_cases['submission_date'], y=new_cases['new_case'], marker_color='blue', name='New Cases'))
fig.add_trace(go.Scatter(x=new_cases['submission_date'], y=new_cases['avg_7_day'], name='7 Day Avg'))
fig.update_layout(title_text='New COVID-19 Cases Nationwide',
                  title_x=0.5)
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Total Cases')
fig.show()

In [54]:
day_total_new_cases = df[df['submission_date'] == '2020-10-23']

In [55]:
day_total_new_cases['new_case'].sum()

82929

In [74]:
# START OF FIGURING OUT SLIDERS!!!

from covid import date_range
from datetime import date, datetime, timedelta
d_range = date_range(date(2020, 1, 22), (date.today() - timedelta(days=1)))

In [105]:
fig = go.Figure()

for dt in d_range:
    cases = df.query(f'submission_date == "{dt}"')
    fig.add_trace(
        go.Choropleth(
            locations=cases['state'],
            z=cases['new_case'],
            locationmode='USA-states',
            name='',
            visible=False,
            colorscale='reds',
            zmin=0,
            zmax=cases['new_case'].max() + 1,
            colorbar_title='Total Cases'
            )
    )

fig.data[-1].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        method='update',
        label=d_range[i],
        args=[{'visible': [False] * len(fig.data)},
              {'title': f"New COVID-19 Cases - {datetime.strptime(d_range[i], '%Y-%m-%d').strftime('%B %d, %Y')}"}],
    )
    step['args'][0]['visible'][i] = True
    steps.append(step)

sliders = [dict(
    active=275,
    steps=steps,
    tickcolor='white',
    font={'color': 'white'}
)]

fig.update_layout(
    title_text=f"New COVID-19 Cases - {datetime.strptime(d_range[-1], '%Y-%m-%d').strftime('%B %d, %Y')}",
    title_x=0.5,
    geo_scope='usa',
    sliders=sliders
)

fig.show()