In [9]:
from pathlib import Path
import pandas as pd
import os
from delphi_nhsn.constants import SIGNALS_MAP, PRELIM_SIGNALS_MAP
from delphi_epidata import Epidata
from epiweeks import Week
CSV_PATH = Path(os.path.abspath('')).parent.joinpath('receiving')
SIGNALS = list(SIGNALS_MAP.keys()) + list(PRELIM_SIGNALS_MAP.keys())

In [10]:
def generate_df(geo, pathogen, prelim=False):
    lst = []
    geo_filter = f'*{geo}'
    pathogen_filter = f'confirmed_admissions_{pathogen}.csv'
    prelim_filter = f'{prelim}_' if prelim else ""
    filter = f"{geo_filter}_{prelim_filter}{pathogen_filter}"
    print(filter)
    for files in Path(CSV_PATH).glob(filter):
        filename_parts = files.name.split('_')
        signal = "_".join(filename_parts[3:]).replace('.csv', '')
        time_value = filename_parts[1]
        df = pd.read_csv(files)
        df['signal'] = signal
        df['time_value'] = time_value
        df["geo_id"] = df["geo_id"].str.lower()
        lst.append(df)
    df = pd.concat(lst)
    df = df.drop(columns=['se','sample_size'], axis=1)
    return df

In [11]:
def get_hhs_df(geo, pathogen):
    response = Epidata.covidcast("hhs", f"confirmed_admissions_{pathogen}_1d",
                      geo_type=geo, time_values=Epidata.range("20200808", "20240426"),
                              geo_value="*", as_of=None, time_type="day")
    df = pd.DataFrame.from_dict(response["epidata"])
    df.rename(columns={"geo_value": "geo_id", "value": "val", "stderr": "se"}, inplace=True)
    df = df[["geo_id", "time_value", "val", "signal"]]
    df["time_value"] = pd.to_datetime(df["time_value"], format="%Y%m%d")
    
    df["week_time"] = df["time_value"].apply(lambda x: Week.fromdate(x))
    df = df.groupby(["geo_id", "signal", "week_time"]).val.sum().reset_index()
    
    df.rename({"week_time": "time_value"}, axis=1, inplace=True)
    return df


In [12]:
state_df = generate_df("state", "covid")
hhs_state_df = get_hhs_df("state", "covid")

*state_confirmed_admissions_covid.csv


In [13]:
def generate_spearman(nhsn_df, hhs_df, slice_col):
    nhsn_df.time_value = state_df.time_value.astype(str)
    hhs_df.time_value = hhs_state_df.time_value.astype(str)
    
    joined = pd.merge(nhsn_df, hhs_df, on=["geo_id", "time_value"], how="left", suffixes=["_nhsn", "_hhs"])
    
    spearmanr_df = joined.groupby(slice_col)[["val_nhsn", "val_hhs"]].corr("spearman").iloc[0::2,-1].reset_index()
    spearmanr_df.rename({"val_hhs": "correlation"}, axis=1, inplace=True)
    spearmanr_df = spearmanr_df[[slice_col, "correlation"]]
    spearmanr_df[slice_col] = spearmanr_df[slice_col].str.upper()
    return spearmanr_df
    

In [15]:
import pandas as pd
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode

init_notebook_mode(connected=True)

spearmanr_by_state = generate_spearman(state_df, hhs_state_df, "geo_id")

fig = go.Figure(data=go.Choropleth(
    locations=spearmanr_by_state['geo_id'], # Spatial coordinates
    z = spearmanr_by_state['correlation'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Viridis',
    colorbar_title = "spearman rank",
))

fig.update_layout(
    title_text = 'Spearman Correlations between HHS and NHSN',
    geo_scope='usa', # limite map scope to USA
)

fig.show()

In [16]:
import pandas as pd
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode


init_notebook_mode(connected=True)

spearmanr_by_time = generate_spearman(state_df, hhs_state_df, "time_value")

# Create the timeplot
fig = go.Figure()
fig.add_trace(go.Scatter(x=spearmanr_by_time['time_value'], y=spearmanr_by_time['correlation'], mode='lines', name='Time Series'))
fig.update_layout(title='Spearman Correlations between HHS and NHSN over time')
fig.show()