In [5]:
from pathlib import Path
import pandas as pd
import os

from delphi_nhsn.constants import SIGNALS_MAP, PRELIM_SIGNALS_MAP
from delphi_epidata import Epidata
from epiweeks import Week
CSV_PATH = Path(os.path.abspath('')).parent.joinpath('receiving')
SIGNALS = list(SIGNALS_MAP.keys()) + list(PRELIM_SIGNALS_MAP.keys())

In [35]:
def get_nhsn_df(geo, pathogen, prelim=False):
    lst = []
    geo_filter = f'*{geo}'
    pathogen_filter = f'confirmed_admissions_{pathogen}.csv'
    prelim_filter = f'prelim_' if prelim else ""
    filter = f"{geo_filter}_{prelim_filter}{pathogen_filter}"
    for files in Path(CSV_PATH).glob(filter):
        filename_parts = files.name.split('_')
        signal = "_".join(filename_parts[3:]).replace('.csv', '')
        time_value = filename_parts[1]
        df = pd.read_csv(files)
        df['signal'] = signal
        df['time_value'] = time_value
        df["geo_id"] = df["geo_id"].str.upper()
        df.time_value = df.time_value.astype(str)
        lst.append(df)
    df = pd.concat(lst)
    df = df.drop(columns=['se','sample_size'], axis=1)
    return df

In [38]:
def get_hhs_df(geo, pathogen):
    response = Epidata.covidcast("hhs", f"confirmed_admissions_{pathogen}_1d",
                      geo_type=geo, time_values=Epidata.range("20200808", "20240426"),
                              geo_value="*", as_of=None, time_type="day")
    df = pd.DataFrame.from_dict(response["epidata"])
    df.rename(columns={"geo_value": "geo_id", "value": "val", "stderr": "se"}, inplace=True)
    df = df[["geo_id", "time_value", "val", "signal"]]
    df["time_value"] = pd.to_datetime(df["time_value"], format="%Y%m%d")
    
    df["week_time"] = df["time_value"].apply(lambda x: Week.fromdate(x))
    df = df.groupby(["geo_id", "signal", "week_time"]).val.sum().reset_index()
    df["geo_id"] = df["geo_id"].str.upper()
    df.rename({"week_time": "time_value"}, axis=1, inplace=True)
    df.time_value = df.time_value.astype(str)
    return df


In [12]:
def generate_spearman(nhsn_df, hhs_df, slice_col):
    
    joined = pd.merge(nhsn_df, hhs_df, on=["geo_id", "time_value"], how="left", suffixes=["_nhsn", "_hhs"])
    
    spearmanr_df = joined.groupby(slice_col)[["val_nhsn", "val_hhs"]].corr("spearman").iloc[0::2,-1].reset_index()
    spearmanr_df.rename({"val_hhs": "correlation"}, axis=1, inplace=True)
    spearmanr_df = spearmanr_df[[slice_col, "correlation"]]
    spearmanr_df[slice_col] = spearmanr_df[slice_col].str.upper()
    return spearmanr_df
    

In [19]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

from plotly.offline import init_notebook_mode

init_notebook_mode(connected=True)


def plot_spearman_choropleth(nhsn_df, hhs_df, pathogen, download=False):
    spearmanr = generate_spearman(nhsn_df, hhs_df, "geo_id")

    fig = go.Figure(data=go.Choropleth(
        locations=spearmanr['geo_id'], # Spatial coordinates
        z = spearmanr['correlation'].astype(float), # Data to be color-coded
        locationmode = 'USA-states', # set of locations match entries in `locations`
        colorscale = 'Viridis',
        colorbar_title = "spearman rank",
    ))
    
    fig.update_layout(
        title_text = f'Spearman Correlations between HHS {pathogen} admission and NHSN {pathogen} admission',
        geo_scope='usa', # limite map scope to USA
    )
    
    if download:
        pio.write_image(fig, f'{pathogen}_spearman_choropleth.pdf', format='pdf')
    
    else:
        fig.show()


In [29]:
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode
import plotly.io as pio

init_notebook_mode(connected=True)

def plot_spearman_timeplot(nhsn_df, hhs_df, pathogen, download=False):
    # Create the timeplot
    spearmanr = generate_spearman(nhsn_df, hhs_df, "time_value")
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=spearmanr['time_value'], y=spearmanr['correlation'], mode='lines', name='Time Series'))
    fig.update_layout(title='Spearman Correlations between HHS and NHSN over time')
    fig.show()
    
    if download:
        pio.write_image(fig, f'{pathogen}_spearman_timeplot.pdf', format='pdf')


In [30]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
from plotly.offline import init_notebook_mode

init_notebook_mode(connected=True)

geo_type = "state"
pathogen = "covid"
nhsn_df = get_nhsn_df(geo_type, pathogen)

def plot_choropleth_overtime(df, z_col):
    color_bar_title = f'Total Confirmed {pathogen} Cases' if z_col == "val" else "Spearman Correlation"
    layout_title = f'NHSN total confirmed {pathogen} cases' if z_col == "val" else f'Spearman Correlation between NHSN and HHS for confirmed {pathogen} '

    df["time_value"] = pd.to_datetime(df.time_value.map(lambda x: str(x)+'-0'), format="%Y%W-%w")
    
    data_slider = []
    
    for time_value in df['time_value'].unique():
        df_segmented =  df[(nhsn_df['time_value']== time_value)]
    
        data_each_yr = dict(
                            type='choropleth',
                            locations = df_segmented['geo_id'],
                            z=df_segmented[z_col].astype(float),
                            locationmode='USA-states',
                            colorscale = 'Viridis',
                            colorbar= {'title': color_bar_title},)
    
        data_slider.append(data_each_yr)
    
    steps = []
    for idx, time_value in enumerate(sorted(df['time_value'].unique())):
        step = {
                    "method":'restyle',
                    "args":[
                        {'visible': [False] * len(data_slider)}
                    ],
                    "label": f"Epiweek {Week.fromdate(pd.to_datetime(time_value))}"
                }
        step['args'][0]["visible"][idx] = True
        steps.append(step)
    
    sliders = [dict(active=0, pad={"t": 1}, steps=steps)]
    
    layout = dict(title =layout_title, geo=dict(scope='usa',
                           projection={'type': 'albers usa'}),
                  sliders=sliders)
    
    fig = go.Figure(data=data_slider, layout=layout)
    fig.show()


*state_confirmed_admissions_covid.csv


In [28]:
from itertools import product

pathogen_mapping = {"flu": "influenza", "covid": "covid"}
for pathogen, prelim_flag in product(("covid", "flu"), (False, True)):
    nhsn_df = get_nhsn_df("state", pathogen, prelim_flag)
    hhs_df = get_hhs_df("state", pathogen_mapping[pathogen])
    plot_spearman_choropleth(nhsn_df, hhs_df, pathogen, download=True)
    plot_spearman_timeplot(nhsn_df, hhs_df, pathogen, download=True)

*state_confirmed_admissions_covid.csv
*state_prelim_confirmed_admissions_covid.csv
*state_confirmed_admissions_flu.csv
*state_prelim_confirmed_admissions_flu.csv


In [99]:
pathogen_mapping = {"flu": "influenza", "covid": "covid"}

# for pathogen, prelim_flag in product(("covid", "flu"), (False, True)):
pathogen = "flu"
prelim_flag = False
nhsn_df = get_nhsn_df("state", pathogen, prelim_flag)

hhs_df = get_hhs_df("state", pathogen_mapping[pathogen])
# hhs_df["time_value"] = pd.to_datetime(hhs_df.time_value.map(lambda x: str(x)+'-0'), format="%Y%W-%w")


joined = pd.merge(nhsn_df, hhs_df, on=["geo_id", "time_value"], how="left", suffixes=["_nhsn", "_hhs"])
spearmanr_df = joined.groupby(["geo_id", "time_value"], as_index=False)[["geo_id", "time_value", "val_nhsn", "val_hhs"]]
for name, group in spearmanr_df:
    print(group)
# spearmanr_df.rename({"val_hhs": "correlation"}, axis=1, inplace=True)
# spearmanr_df = spearmanr_df[["geo_id","time_value", "correlation"]]
# spearmanr_df["geo_id"] = spearmanr_df["geo_id"].str.upper()
# spearmanr_df
    # plot_choropleth_overtime(spearman_df, "correlation")

     geo_id time_value  val_nhsn  val_hhs
3192     AK     202032       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
6160     AK     202033       NaN      0.0
    geo_id time_value  val_nhsn  val_hhs
112     AK     202034       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
3248     AK     202035       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
6608     AK     202036       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
9576     AK     202037       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
5824     AK     202038       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
2800     AK     202039       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
5376     AK     202040       NaN      0.0
     geo_id time_value  val_nhsn  val_hhs
2296     AK     202041       NaN      0.0
      geo_id time_value  val_nhsn  val_hhs
11872     AK     202042       0.0      0.0
     geo_id time_value  val_nhsn  val_hhs
8848     AK     202043       0.0  