In [None]:
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import pathlib
import requests
import us


# Analysis with data using public API

In [None]:
def get_kinsa_state_data(state_abreviation: str) -> pd.DataFrame:
    """
    Fetch kinsa data from public api.
    """
    r = requests.get(f"https://static.kinsahealth.com/{state_abreviation}_data.json")
    if r.status_code == 200:
        records = r.json()
        df = pd.DataFrame.from_records(data=records['data'], columns=records['columns'])
        df['date']= pd.to_datetime(df['date'])
        return df

In [None]:
def get_prod_snapshot_number():
    covid_projections_data_url ="https://raw.githubusercontent.com/covid-projections/covid-projections/develop/src/assets/data/data_url.json"
    return requests.get(covid_projections_data_url).json()['data_url'].split("/")[-2]


def get_can_rt_df()-> pd.DataFrame:
    latest_snapshot_number = get_prod_snapshot_number()
    rt_df = pd.read_csv(f"https://data.covidactnow.org/snapshot/{latest_snapshot_number}/us/states.OBSERVED_INTERVENTION.timeseries.csv")
    rt_df['date'] = pd.to_datetime(rt_df['date'])
    return rt_df
rt_df = get_can_rt_df()

In [None]:
def plot_kinsa_vs_rt(state_identifier: str, rt_df: pd.DataFrame, ax=None):
        state = us.states.lookup(state_identifier)
        combined_df = get_combined_kinsa_rt_df(state_identifier, rt_df)
        if combined_df is not None:
            kwargs = dict(use_index=True, title=state.name)
            if ax:
                kwargs["ax"] = ax
            combined_df.plot(x='date', y=["atypical_ili"], **kwargs )
            combined_df.plot(x="date", y=["RtIndicator"], secondary_y=True, **kwargs)
    
def get_combined_kinsa_rt_df(state_identifier: str, rt_df: pd.DataFrame):
    state = us.states.lookup(state_identifier)
    state_abreviation = state.abbr
    df = get_kinsa_state_data(state_abreviation)
    if df is not None:
        kinsa_state_df = df.dropna().groupby("date").mean()

        state_rt_df = rt_df[rt_df.stateName == state.name].sort_values('date')
        combined_df = kinsa_state_df.merge(state_rt_df, on='date')
        return combined_df

# Plot atypical_ili against daily cases, all 50 states: 

In [None]:
fig, ax = plt.subplots(nrows=11, ncols=5)
states_to_correlation_coefficients = dict()
i=0
for j, state in enumerate(us.STATES):
    if  j > 0 and not j % 5:
        i+=1
    axij = ax[i, j % 5]
    axij.set_title(state.name)
    combined_df = get_combined_kinsa_rt_df(state.abbr, rt_df)
    if combined_df is not None:
        combined_df.plot(x='date', y=["atypical_ili",], ax=axij, )
        combined_df.groupby("date").sum()[['cumulativeInfected']].diff(1).plot(y="cumulativeInfected", 
                                                                               label='dailyCases',
                                                                               secondary_y=True, ax=axij)
        axij.legend().remove()


fig.set_size_inches(18.5, 30.5, forward=True)
fig.subplots_adjust(hspace=1.7)


# Plot atypical_ili against rt values, all 50 states

In [None]:
fig, ax = plt.subplots(nrows=11, ncols=5)
states_to_correlation_coefficients = dict()
i=0
for j, state in enumerate(us.STATES):
    if  j > 0 and not j % 5:
        i+=1
    axij = ax[i, j % 5]
    axij.set_title(state.name)
    combined_df = get_combined_kinsa_rt_df(state.abbr, rt_df)
    if combined_df is not None:
        combined_df.plot(x='date', y=["atypical_ili",], ax=axij)
        combined_df.plot(x='date', y=["RtIndicator"],secondary_y=True, ax=axij)
        axij.legend().remove()


fig.set_size_inches(18.5, 30.5, forward=True)
fig.subplots_adjust(hspace=1.7)


# Find timeshift needed to align series for rt and atypcal_ili

In [None]:
from pyseir.inference.run_infer_rt import RtInferenceEngine 

In [None]:
timeshifts=[]
for state in us.STATES:
    combined_df = get_combined_kinsa_rt_df(state.abbr, rt_df)
    shift_in_days=None
    if combined_df is not None and not combined_df.empty: 
        shift_in_days = RtInferenceEngine.align_time_series(series_a=combined_df["atypical_ili"], series_b=combined_df["RtIndicator"],)
    timeshifts.append((state.name, shift_in_days))
timeshifts = pd.DataFrame(data=timeshifts, columns = ["stateName", "rt_atypical_ili_shift_days"] )

In [None]:
timeshifts.hist()


# Analysis with State-level aggregation kinsa data (one-off)


A csv file was provided by kinsa with state-level data and that is shown below. Preliminary analysis indicates this data does not significantly deviate 
from an aggregation of the county level data (aggregated) from the kinsa api. 

In [None]:
kinsa_state_data_file_location = pathlib.Path("~/Downloads/atypical_ili_export_States.csv")
kinsa_state_df = pd.read_csv(kinsa_state_data_file_location, skiprows=1, parse_dates=True)
kinsa_state_df.date = pd.to_datetime(kinsa_state_df.date)

In [None]:
kinsa_state_df.head()

In [None]:
rt_df["state_abbr"] = rt_df.stateName.apply(lambda name: us.states.lookup(name).abbr)

combined_state_df = rt_df.merge(kinsa_state_df, left_on=['date', 'state_abbr'], right_on=['date', 'region'])

In [None]:
combined_state_df

In [None]:
fig, ax = plt.subplots(nrows=11, ncols=5)
states_to_correlation_coefficients = dict()
i=0
for j, state in enumerate(us.STATES):
    if  j > 0 and not j % 5:
        i+=1
    axij = ax[i, j % 5]
    axij.set_title(state.name)
    state_df = combined_state_df[combined_state_df.state_abbr==state.abbr]
    if state_df is not None:
        state_df.plot(x='date', y=["atypical_ili_median",], ax=axij)
        state_df.plot(x='date', y=[ "RtIndicator"], ax=axij, secondary_y=True)

        axij.legend().remove()


fig.set_size_inches(18.5, 30.5, forward=True)
fig.subplots_adjust(hspace=1.7)
fig