In [4]:
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import dateutil.parser
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def data_parser(time_in):
    """
    Process dates in dataframe to string format
    """
    return dateutil.parser.parse(time_in).strftime('%m/%d')

In [34]:
df_data = pd.read_csv("states-daily.csv")  # daily U.S. data from https://covidtracking.com/
df_data.sort_values(by='date', ascending=True, inplace=True)

df_states = pd.read_csv("nst-est2019-popchg2010_2019.csv")  # census population data
df_state_names = pd.read_csv("state_names.csv")  # state names

# map state names to abbreviations
d_abb_to_name = {}
for i in range(0, df_state_names.shape[0]):
    d_abb_to_name[df_state_names['Abbreviation'].values[i]] = df_state_names['State'].values[i]
    
# get population by state
d_pop = {}
for v in d_abb_to_name.keys():
    d_pop[v] = df_states.loc[df_states['NAME'] == d_abb_to_name[v]][['POPESTIMATE2019']].values[0][0]

# for each state, get the slice of dataframe as appropriate, and then convert dates
ddf_states = {}
for name, group in df_data.groupby("state"):
    ddf_states[name] = group
    ddf_states[name]['date_parsed'] = ddf_states[name].loc[:, ('dateChecked')].apply(lambda x: data_parser(x))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Visualize Per Million
It's more instructive to look at the data in terms of cases per million. We also look at tests per million.

In [35]:
def viz_pos_vs_test(ddf_states_in, d_pop_in, states_in):
    date = ddf_states_in[states_in[0]]['date_parsed']
    pos1 = ddf_states_in[states_in[0]]['positive'].values
    pos2 = ddf_states_in[states_in[1]]['positive'].values
    neg1 = np.nan_to_num(ddf_states_in[states_in[0]]['negative'].values)
    neg2 = np.nan_to_num(ddf_states_in[states_in[1]]['negative'].values)

    test1 = pos1+neg1
    test2 = pos2+neg2
    
    pos1_pm = pos1/(d_pop_in[states_in[0]]/1e6)
    pos2_pm = pos2/(d_pop_in[states_in[1]]/1e6)    
    test1_pm = test1/(d_pop_in[states_in[0]]/1e6)
    test2_pm = test2/(d_pop_in[states_in[1]]/1e6)


    fig = go.Figure()
    #fig = make_subplots(rows=2, cols=1)

    fig.add_trace(go.Scatter(
        x=date,
        y=pos1_pm,
        name = "cases per million ({})".format(states_in[0]),
        connectgaps=True # override default to connect the gaps
    ))
    fig.add_trace(go.Scatter(
        x=date,
        y=pos2_pm,
        name="cases per million ({})".format(states_in[1]),
        connectgaps=True # override default to connect the gaps
    ))

    # Add figure title
    fig.update_layout(
        title_text="Positive Cases of COVID19 (Per Million)"
    )

    # Set x-axis title
    fig.update_xaxes(title_text="date")

    fig.show()


    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=date,
        y=test1_pm,
        name = "tests per million ({})".format(states_in[0]),
        connectgaps=True # override default to connect the gaps
    ))
    fig.add_trace(go.Scatter(
        x=date,
        y=test2_pm,
        name="tests per million ({})".format(states_in[1]),
        connectgaps=True # override default to connect the gaps
    ))

    # Add figure title
    fig.update_layout(
        title_text="Tests of COVID19 (Per Million)"
    )

    # Set x-axis title
    fig.update_xaxes(title_text="date")
    return fig



pairs = [('NY', 'WA')]
for curr_pair in pairs:    
    print("\n\nMaking figures for pair {}".format(curr_pair))
    curr_fig = viz_pos_vs_test(ddf_states_in=ddf_states, d_pop_in=d_pop, states_in=curr_pair)
    curr_fig.show()




Making figures for pair ('NY', 'WA')


In [32]:
pairs = [('NY', 'CA')]
for curr_pair in pairs:    
    print("\n\nMaking figures for pair {}".format(curr_pair))
    curr_fig = viz_pos_vs_test(ddf_states_in=ddf_states, d_pop_in=d_pop, states_in=curr_pair)
    curr_fig.show()



Making figures for pair ('NY', 'CA')


In [33]:
pairs = [('NY', 'GA')]
for curr_pair in pairs:    
    print("\n\nMaking figures for pair {}".format(curr_pair))
    curr_fig = viz_pos_vs_test(ddf_states_in=ddf_states, d_pop_in=d_pop, states_in=curr_pair)
    curr_fig.show()



Making figures for pair ('NY', 'GA')
