In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [48]:
def get_pop_data():
    remove_words = ['County', 'Municipality', 'Parish', ]
    county_pop = pd.read_csv('./../county_pop.csv')
    county_pop = county_pop[['STNAME', 'CTYNAME', 'CENSUS2010POP', 'COUNTY']]
    county_pop.columns = ['state', 'county', 'pop', 'county_num']
    for word in remove_words:
        county_pop.county = county_pop.county.apply(lambda x: x.replace(f' {word}', '').strip())
        
    # add custom pops
    custom_pops = pd.DataFrame({
        'state': ['Missouri', 'New York', 'Missouri', 'New Mexico'],
        'county': ['Kansas City', 'New York City', 'Joplin', 'Doña Ana'],
        'pop': [491918, 8399000, 50657, 218195],
        'county_num': [99 for i in range(4)]
    })
    return pd.concat([county_pop, custom_pops])

def get_covid_data():
    return pd.read_csv('./../us-counties.csv')

def get_county_data():
    county_covid = get_covid_data()
    county_pop = get_pop_data()
    county = county_covid.set_index(
        ['state', 'county']
    ).join(
        county_pop.set_index(['state', 'county'])
    )
    county = county.drop(columns=['county_num'])
    return county

def add_per_capita(df):
    df = df.copy()
    df['cases_per_capita'] = df['cases'] / df['pop']
    df['deaths_per_capita'] = df['deaths'] / df['pop']
    df['cases_per_k'] = df['cases_per_capita'] * 1000.0
    df['deaths_per_k'] = df['deaths_per_capita'] * 1000.0
    return df

def add_pct_changes(df, func, cols, new_names):
    chg = df.copy()
    chg = getattr(chg[cols], func)()
    chg.columns = new_names
    for col in new_names:
        chg[chg[col] <= -0.998] = np.NaN
    return df.join(chg)
        
def add_day_since_outbreak_numbers(df):
    df = df.copy()
    df['day_since_first_case'] = list(
        df
        .reset_index()
        .groupby(df.index.names[:-1])
        .cumcount()
    )
    return df


def get_data():
    county = get_county_data()
    pop = get_pop_data()
    
    state = county[['date', 'cases', 'deaths']].groupby(['state', 'date']).sum()
    state = state.join(pop[pop.county_num == 0].set_index('state'))
    state = state.drop(columns=['county_num', 'county'])
    
    country = county[['date', 'cases', 'deaths']].groupby(['date']).sum()
    country['pop'] = 328200000.0
    country['Country'] = 'United States'
    country = country.reset_index().set_index(['Country', 'date'])
    
    county = (
        county
        .reset_index()
        .set_index(['state', 'county', 'date'])
    )
    
    dfs = [country, state, county]
    dfs = [add_per_capita(df) for df in dfs]
    dfs = [
        add_pct_changes(
            df, 
            'pct_change',
            ['cases', 'deaths'], 
            ['cases_pct_change', 'death_pct_change']
        ) for df in dfs
    ]
    dfs = [
        add_pct_changes(
            df, 
            'pct_change',
            ['cases_pct_change', 'death_pct_change'], 
            ['cases_pct_change_pct_change', 'death_pct_change_pct_change']
        ) for df in dfs
    ]
    dfs = [
        add_pct_changes(
            df, 
            'diff',
            ['cases', 'deaths'], 
            ['new_cases', 'new_deaths']
        ) for df in dfs
    ]
    dfs = [add_day_since_outbreak_numbers(df) for df in dfs]
    
    return dfs
    


In [49]:
country, state, county = get_data()


In [50]:
country

Unnamed: 0_level_0,Unnamed: 1_level_0,cases,deaths,pop,cases_per_capita,deaths_per_capita,cases_per_k,deaths_per_k,cases_pct_change,death_pct_change,cases_pct_change_pct_change,death_pct_change_pct_change,new_cases,new_deaths,day_since_first_case
Country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
United States,2020-01-21,1,0,328200000.0,3.046923e-09,0.0,3e-06,0.0,,,,,,,0
United States,2020-01-22,1,0,328200000.0,3.046923e-09,0.0,3e-06,0.0,0.0,,,,0.0,0.0,1
United States,2020-01-23,1,0,328200000.0,3.046923e-09,0.0,3e-06,0.0,0.0,,,,0.0,0.0,2
United States,2020-01-24,2,0,328200000.0,6.093845e-09,0.0,6e-06,0.0,1.0,,inf,,1.0,0.0,3
United States,2020-01-25,3,0,328200000.0,9.140768e-09,0.0,9e-06,0.0,0.5,,-0.5,,1.0,0.0,4
United States,2020-01-26,5,0,328200000.0,1.523461e-08,0.0,1.5e-05,0.0,0.666667,,0.333333,,2.0,0.0,5
United States,2020-01-27,5,0,328200000.0,1.523461e-08,0.0,1.5e-05,0.0,0.0,,,,0.0,0.0,6
United States,2020-01-28,5,0,328200000.0,1.523461e-08,0.0,1.5e-05,0.0,0.0,,,,0.0,0.0,7
United States,2020-01-29,5,0,328200000.0,1.523461e-08,0.0,1.5e-05,0.0,0.0,,,,0.0,0.0,8
United States,2020-01-30,6,0,328200000.0,1.828154e-08,0.0,1.8e-05,0.0,0.2,,inf,,1.0,0.0,9


In [51]:
# Create traces
states_im_interested_in = ['California', 'Idaho', 'Utah', 'New York', 'Montana', 'Texas']
df = state.reset_index()
df = df[[state in states_im_interested_in for state in df.state]]
cols = ['cases', 'cases_per_k', 'new_cases', 'cases_pct_change', 'cases_pct_change_pct_change']
for col in cols:
    fig = go.Figure()
    for s in df.state.unique():
        fig.add_trace(go.Scatter(x=df[df.state == s].day_since_first_case, y=df[df.state == s][col],
                            mode='lines+markers',
                            name=s))

    fig.show()
