In [None]:
# Setup our notebook tools
import pandas as pd
import plotly.express as px
import numpy

def deltas(dataframe, column, category):
    # Return a new series containing changes by category
    column_name = "column-" + column
    column_data = [0] * len(dataframe.index)
    series = pd.Series(name=column_name, data=column_data, index=dataframe.index)
    last_data = {}
    for index, row in dataframe.iterrows():
        try:
            last = last_data[row[category]]
        except KeyError:
            last = 0
        series[index] = row[column] - last
        if series[index] < 0:
            series[index] = 0
        last_data[row[category]] = row[column]
    return series

df = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
states = ['Virginia', 'Ohio', 'Georgia', 'West Virginia']

In [None]:
# Cast our dates to bonafide datetimes
df['date'] = pd.to_datetime(df['date'])
# Sort the dataframe by date
df_sorted = df.sort_values(by=['date'])

# Narrow the dataset to states we're interested in
states_query = ' | '.join(['state == "' + state + '"' for state in states])
my_states = df_sorted.query(states_query)

In [None]:
px.line(my_states, x='date', y='cases', color='state', title="COVID 19 Cases For Select US States vs Date")

In [None]:
cases_delta = deltas(my_states, "cases", "state")
my_states_with_deltas = my_states.assign(cases_delta=cases_delta)
deaths_delta = deltas(my_states, "deaths", "state")
my_states_with_deltas = my_states_with_deltas.assign(deaths_delta=deaths_delta)
my_states_with_deltas.tail(8)

In [None]:
px.scatter(my_states_with_deltas, x='date', y='cases_delta', color='state', title='COVID 19 Change in Cases Per Day For Select US States vs Date', trendline='ols')

In [None]:
facts_by_state = {
    'Virginia': {
        'population': 8518000,
        'density': 202.6
    },
    'Ohio': {
        'population': 11690000,
        'density': 282.3
    },
    'Georgia': {
        'population': 10520000,
        'density': 149.0
    },
    'West Virginia': {
        'population': 1806000,
        'density': 77.1
    }
}
for state in facts_by_state.keys():
    facts_by_state[state]['population_per_million'] = facts_by_state[state]['population'] / 1000000