### Comparison of Covid-19 Infections and Deaths per 100K people, by State

In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from IPython.display import display
import scrapbook as sb

# List of states to graph
states = ['Alaska', 'Connecticut', 'Colorado', 'Wisconsin', 'New York', 'South Dakota', 'Montana']

In [None]:
# Parameters that will be changed by the bmonreporter report-creating script.
# This cell must have a "parameters" tag.
server_web_address = None     # irrelevant, no BMON data used
org_id = None  # irrelevant, no BMON data used

In [None]:
# The report generator script needs to the know the Title of this report and where
# to sort it with respect to other reports.  Glue that information to this notebook.
sb.glue('title', 'COVID per 100K People, by State')
sb.glue('sort_order', 100)

In [None]:
dfs = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv', parse_dates=['date'])
dfs.set_index('date', inplace=True)

In [None]:
# get state populations into DataFrame
#df_pop = pd.read_csv('state-population.csv')
df_pop = pd.read_csv('https://raw.githubusercontent.com/alanmitchell/an-data/master/demographic/state-population.csv')
state_to_pop = dict(zip(df_pop.state, df_pop.population))

In [None]:
days_to_avg = 7
def new_per_capita_avg(df):
    try:
        pop_100k = state_to_pop[df.iloc[0].state] / 100000.
        new_cases = df.cases.diff() / pop_100k
        new_deaths = df.deaths.diff() / pop_100k
        new_deaths_p_100k_avg = new_deaths.rolling(days_to_avg).mean()
        new_cases_p_100k_avg = new_cases.rolling(days_to_avg).mean()
        return pd.DataFrame({'new_deaths_p_100k_avg': new_deaths_p_100k_avg, 'new_cases_p_100k_avg': new_cases_p_100k_avg})
    except:
        return pd.DataFrame({'new_deaths_p_100k_avg': [np.NaN]*len(df), 'new_cases_p_100k_avg': [np.NaN]*len(df)})

df_results = dfs.groupby('state').apply(new_per_capita_avg).reset_index()
df_results.dropna(inplace=True)

All values shown the graphs and tables below are 7-day rolling averages.

A selection of states are shown in the graphs below.
You can click on State names in the Legend to hide and then show again various states.
You can also click and drag a rectangle on the graph to zoom in on a particular region.

In [None]:
df_state_subset = df_results.query('state in @states')
fig = px.line(df_state_subset, x="level_1", y="new_cases_p_100k_avg", color='state')
fig.update_layout(
    yaxis_title_text = 'New Confirmed Cases per 100K People',
    yaxis_title_font_size = 18,
    xaxis_title_text = 'Date',
    xaxis_title_font_size = 20,
    title_text = 'New Confirmed Cases per 100,000 People',
    title_font_size = 30,
)
fig.show()

In [None]:
fig = px.line(df_state_subset, x="level_1", y="new_deaths_p_100k_avg", color='state')
fig.update_layout(
    yaxis_title_text = 'New Deaths per 100K People',
    yaxis_title_font_size = 20,
    xaxis_title_text = 'Date',
    xaxis_title_font_size = 20,
    title_text = 'New Deaths per 100,000 People',
    title_font_size = 30,
)
fig.show()

The tables below show new cases and new deaths per 100,000 people, sorted from the highest incidence states
to lowest.

In [None]:
days_to_avg = 7
print(f'Last Day of Data: {dfs.index[-1].strftime("%b %d, %Y")}')
print(f'Number of Days in Average: {days_to_avg}')
def new_per_capita(df):
    try:
        new_cases = df.cases.diff()
        new_deaths = df.deaths.diff()
        pop_100k = state_to_pop[df.iloc[0].state] / 100000.
        new_deaths_p_100k = new_deaths[-days_to_avg:].mean() / pop_100k
        new_cases_p_100k = new_cases[-days_to_avg:].mean() / pop_100k
        return pd.Series({'New Deaths per 100K People': new_deaths_p_100k, 'New Cases per 100K People': new_cases_p_100k})
    except:
        return pd.Series({'New Deaths per 100K People': np.NaN, 'New Cases per 100K People': np.NaN})

df_results2 = dfs.groupby('state').apply(new_per_capita)
df_results2.dropna(inplace=True)
display(df_results2.sort_values('New Cases per 100K People', ascending=False)[['New Cases per 100K People']])
df_results2.sort_values('New Deaths per 100K People', ascending=False)[['New Deaths per 100K People']]

Developed by Alan Mitchell, alan@analysisnorth.com, using [data compiled by the New York Times](https://github.com/nytimes/covid-19-data).
This page is regenerated nightly using the most recent NY Times-compiled data.

Also see the NY Times [COVID US Tracking page](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html).