In [3]:
import sys
sys.path.append("../")

In [4]:
import load
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd

%matplotlib inline

# New Cases Data

In [5]:
cases_df = load.load_confirmed_by_region("../data/usa/data_cases.csv")

In [35]:
def compute_rolling_mean_minus_var(cases_df, days=7):
    """Returns rolling mean - rolling var per county.
        Aligned by days since first case
    """
    cases = []

    for region in cases_df.columns:
        cases.append(cases_df[region].replace(0, np.nan).dropna().values)
    
    region_to_cases = {
        region: pd.Series(cases[i]) for i, region in enumerate(cases_df.columns)
    }
    cases_since_start_df = pd.DataFrame(region_to_cases)
    new_cases_since_start_df = cases_since_start_df.diff().iloc[1:,:]
    rolling_df = new_cases_since_start_df.rolling(days).mean() - new_cases_since_start_df.rolling(days).var()
    # drop first 7 days for which there is no rolling mean or var
    rolling_df = rolling_df.iloc[days:, :]
    rolling_df = rolling_df.reset_index(drop=True)
    return rolling_df
    
def average_by_state(rolling_df):
    """average rolling mean - rolling var by state
    Return new dataframe with states and day columns
    """
    states = [region.split(",")[1].strip() for region in rolling_df.columns]
    states_df = rolling_df.copy().T
    states_df["state"] = states
    states_df = states_df.groupby("state").mean()
    states_df = states_df.stack().reset_index()
    states_df = states_df.rename(columns={
        states_df.columns[1]: "Days (since first case)", 
        states_df.columns[2]: "7-day Rolling(Mean) - Rolling(Var)", 
    })
    return states_df

In [14]:
rolling_df = compute_rolling_mean_minus_var(cases_df)

# State Average: Rolling(Mean) - Rolling(Var) 

In [33]:
states_df = average_by_state(rolling_df)

In [53]:
fig = px.line(states_df, 
             x="Days (since first case)",
             y="7-day Rolling(Mean) - Rolling(Var)",
             color="state")

fig.update_layout(title_text="Average State Dispersion")

fig.show()

# Top 20 Counties (by cumulative cases)

In [45]:
def stack_counties(rolling_df):
    counties_df = rolling_df.stack().reset_index()
    counties_df = counties_df.rename(columns={
        counties_df.columns[0]: "Days (since first case)", 
        counties_df.columns[1]: "county", 
        counties_df.columns[2]: "7-day Rolling(Mean) - Rolling(Var)", 
    })
    return counties_df

In [46]:
top_20_counties = list(cases_df.iloc[-1, :].sort_values().index[-20:])
top_20_rolling_df = compute_rolling_mean_minus_var(cases_df[top_20_counties])
top_20_counties_df = stack_counties(top_20_rolling_df)

In [51]:
fig = px.line(top_20_counties_df, 
             x="Days (since first case)",
             y="7-day Rolling(Mean) - Rolling(Var)",
             color="county")

fig.update_layout(title_text="Dispersion: Top 20 Counties (by cumulative cases)")

fig.show()

# Median 20 Counties (by cumulative cases)

In [57]:
median = len(cases_df.columns) // 2
median_20_counties = list(cases_df.iloc[-1, :].sort_values().index[median - 10: median + 10])
median_20_rolling_df = compute_rolling_mean_minus_var(cases_df[median_20_counties])
median_20_counties_df = stack_counties(median_20_rolling_df)

In [58]:
fig = px.line(median_20_counties_df, 
             x="Days (since first case)",
             y="7-day Rolling(Mean) - Rolling(Var)",
             color="county")

fig.update_layout(title_text="Dispersion: Median 20 Counties (by cumulative cases)")

fig.show()