In [1]:
import numpy as np
import pandas as pd
import plotly as py

In [72]:
df = pd.read_csv("data/states_covid_history.csv")
print(df.head())


         date state dataQualityGrade    death  deathConfirmed  deathIncrease  \
0  2021-02-05    AK                A    279.0             NaN              0   
1  2021-02-05    AL                A   8449.0          6697.0             84   
2  2021-02-05    AR               A+   5050.0          4032.0             41   
3  2021-02-05    AS              NaN      0.0             NaN              0   
4  2021-02-05    AZ               A+  13948.0         12399.0            196   

   deathProbable  hospitalized  hospitalizedCumulative  hospitalizedCurrently  \
0            NaN        1219.0                  1219.0                   44.0   
1         1752.0       43005.0                 43005.0                 1671.0   
2         1018.0       13902.0                 13902.0                  808.0   
3            NaN           NaN                     NaN                    NaN   
4         1549.0       54309.0                 54309.0                 3167.0   

            ...             tota

In [73]:
# Checking the null values in each column
df.isnull().sum()

date                                    0
state                                   0
dataQualityGrade                     1330
death                                 871
deathConfirmed                      10226
deathIncrease                           0
deathProbable                       12330
hospitalized                         7342
hospitalizedCumulative               7342
hospitalizedCurrently                3345
hospitalizedIncrease                    0
inIcuCumulative                     15661
inIcuCurrently                       8819
negative                             3892
negativeIncrease                        0
negativeTestsAntibody               17732
negativeTestsPeopleAntibody         18211
negativeTestsViral                  14712
onVentilatorCumulative              17920
onVentilatorCurrently               10931
positive                              189
positiveCasesViral                   5454
positiveIncrease                        0
positiveScore                     

In [75]:
map_col_list = ["date", "state", "deathIncrease", "positiveIncrease"]
df = df[map_col_list]

In [76]:
print(df.head())

         date state  deathIncrease  positiveIncrease
0  2021-02-05    AK              0               165
1  2021-02-05    AL             84              1496
2  2021-02-05    AR             41              1824
3  2021-02-05    AS              0                 0
4  2021-02-05    AZ            196              3826


In [77]:
print(len(df))

19093


In [79]:
df.date=pd.to_datetime(df.date)
df_recent = df.sort_values('date').drop_duplicates(['state'],keep='last')

In [80]:
print(len(df_recent))

56


In [81]:
df_recent.isnull().sum()

date                0
state               0
deathIncrease       0
positiveIncrease    0
dtype: int64

In [82]:
df_recent.to_csv('./data/states_covid_recent.csv', index=False)

In [83]:
# plotly components
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [96]:
#Creating the visualization
map_data = dict (
    type = 'choropleth',
    locations = df_recent['state'],
    locationmode='USA-states',
    colorscale = 'Reds',
    marker_line_color = 'black', marker_line_width = 0.3,
    z=df_recent['positiveIncrease'])
lyt = dict(geo=dict(scope='usa'))
map_fig = go.Figure(data=[map_data], layout = lyt)
map_fig.update_layout( title_text = 'COVID19 in the States', title_x = 0.5) 
# py.offline.plot(map)
map_fig.show()

In [97]:
daily = pd.read_csv("data/daily.csv")
print(daily.head())

       date  states    positive     negative  pending  hospitalizedCurrently  \
0  20210205      56  26586775.0  119000944.0  11546.0                86373.0   
1  20210204      56  26455629.0  118405439.0  11914.0                88668.0   
2  20210203      56  26331722.0  117886913.0  12077.0                91440.0   
3  20210202      56  26214762.0  117399234.0  10590.0                92880.0   
4  20210201      56  26097146.0  117037923.0  11957.0                93536.0   

   hospitalizedCumulative  inIcuCurrently  inIcuCumulative  \
0                822320.0         17284.0          42626.0   
1                819380.0         17918.0          42472.0   
2                815978.0         18147.0          42323.0   
3                812003.0         18388.0          42148.0   
4                808718.0         18572.0          41998.0   

   onVentilatorCurrently                    ...                     \
0                 5596.0                    ...                      
1     

In [98]:
daily.isnull().sum()

date                          0
states                        0
positive                      1
negative                     48
pending                      51
hospitalizedCurrently        64
hospitalizedCumulative       51
inIcuCurrently               73
inIcuCumulative              72
onVentilatorCurrently        72
onVentilatorCumulative       79
dateChecked                   0
death                        28
hospitalized                 51
totalTestResults              0
lastModified                  0
recovered                   390
total                         0
posNeg                        0
deathIncrease                 0
hospitalizedIncrease          0
negativeIncrease              0
positiveIncrease              0
totalTestResultsIncrease      0
hash                          0
dtype: int64

In [103]:
daily.date=pd.to_datetime(daily.date)
daily_recent = daily.sort_values('date').drop_duplicates(['states'],keep='last')

In [100]:
map_data = dict (
    type = 'choropleth',
    locations = daily['states'],
    locationmode='USA-states',
    colorscale = 'Reds',
    marker_line_color = 'black', marker_line_width = 0.3,
    z=daily['positive'])
lyt = dict(geo=dict(scope='usa'))
map_fig = go.Figure(data=[map_data], layout = lyt)
map_fig.update_layout( title_text = 'COVID19 in the States', title_x = 0.5) 
# py.offline.plot(map)
map_fig.show()