In [1]:
import pandas as pd
import numpy as np
import datetime
from scorepi import *
from epiweeks import Week
import matplotlib.pyplot as plt
from collections import defaultdict
import seaborn as sns
import datetime
from datetime import datetime
from datetime import timedelta
from pathlib import Path
import matplotlib as mpl
import random
from numba import njit
from scipy.stats import linregress


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from numba import njit
@njit
def energyscore(X,y):
    # X is matrix of trajectories, y is observations
    ES = 0
    N = X.shape[0]
    for i in range(N):
        ES += np.sqrt(np.sum((X[i]-y)**2))/N
    for i in range(N):
        for j in range(N):
            ES -= np.sqrt(np.sum((X[i]-X[j])**2))/(2*N**2)
    return ES


In [4]:
import matplotlib.dates as mdates
def set_date_axis_fmt(ax):
    # Set the locator
    locator = mdates.MonthLocator()  # every month
    # Specify the format
    fmt = mdates.DateFormatter('%b %y')

    X = ax.xaxis
    X.set_major_locator(locator)
    # Specify formatter
    X.set_major_formatter(fmt)

In [5]:
def pull_surveillance_data(target='death',incidence=True):
    mapping = {'death':'Deaths', 'case':'Cases', 'hospitalization': 'Hospitalizations'}
    if incidence:
        s = 'Incident'
    else:
        s = 'Cumulative'
    url = f"https://raw.githubusercontent.com/cdcepi/Flusight-forecast-data/master/data-truth/truth-{s}%20{mapping[target]}.csv"
    return pd.read_csv(url, dtype={'location':str})


In [6]:
target = 'hospitalization'
#target = 'death'
#target = 'case'
incidence = True

observations = pull_surveillance_data(target,incidence)

In [9]:
observations

Unnamed: 0,date,location,location_name,value
0,2020-01-11,01,Alabama,0
1,2020-01-11,15,Hawaii,0
2,2020-01-11,18,Indiana,0
3,2020-01-11,27,Minnesota,0
4,2020-01-11,30,Montana,0
...,...,...,...,...
10221,2023-10-14,50,Vermont,0
10222,2023-10-14,53,Washington,41
10223,2023-10-14,55,Wisconsin,3
10224,2023-10-14,54,West Virginia,3


In [10]:
observations.to_parquet(f"./fludat/truth_{'inc' if incidence else 'cum'}_{target}.pq", index=False)

In [94]:
date = '2022-12-04'
models = ['NIH-FluD', 'PSI-M1', 'JHU_IDD-CovidSP', 'NIH-Flu_TS']

predictions = pd.DataFrame()
for model in models:
    for ext in [".csv",".gz",".zip",".csv.gz"]:
        try:
            filename = date + '-'+model + '-sample'
            pred = pd.read_csv('./fludat/'+filename+ext,dtype={'location':str},
                               parse_dates=['target_end_date'])
            pred['Model'] = model

        except:
            pass
    predictions = pd.concat([predictions, pred])
    
    
for model in ['JHU_IDD-CovidSP']:
    for ext in [".parquet"]:
        try:
            filename = date + '-'+model + '-sample'
            pred = pd.read_parquet('./fludat/'+filename+ext)
            pred.astype({"location": str})
            pred['location'].fillna("US",inplace=True)
            pred['target_end_date'] = pd.to_datetime(pred['target_end_date'])
            pred['Model'] = model

        except:
            pass
    predictions = pd.concat([predictions, pred])
    
    
for model in ['MOBS_NEU-GLEAM_FLU']:
    pred = pd.read_csv('./fludat/round3_selected-runs-MOBS_all-states.csv.gz',dtype={'location':str})
    pred['Model'] = model
    #predictions = pd.concat([predictions, pred])

In [93]:
pred

Unnamed: 0,country_id,n_lag,ifr_multiplier,run_id,death_hosp_multiplier,norm_weight,Hospitalized,Hospitalized_sub,Hospitalized_cum,Hospitalized_sub_cum,epiweek_enddate,scenario,Model
0,39,0,1,61101000000116551,a,0.000348,17.976902,17.976902,91.121790,17.976902,2022-12-10,A,MOBS_NEU-GLEAM_FLU
1,39,0,1,61131000000123127,a,0.000355,13.790269,13.790269,64.601667,13.790269,2022-12-10,A,MOBS_NEU-GLEAM_FLU
2,39,0,1,61101000000024194,a,0.000106,15.775615,15.775615,87.887046,15.775615,2022-12-10,A,MOBS_NEU-GLEAM_FLU
3,39,0,1,61131000000161132,a,0.000093,12.223866,12.223866,75.804124,12.223866,2022-12-10,A,MOBS_NEU-GLEAM_FLU
4,39,0,1,61131000000091925,a,0.000203,17.175736,17.175736,76.870192,17.175736,2022-12-10,A,MOBS_NEU-GLEAM_FLU
...,...,...,...,...,...,...,...,...,...,...,...,...,...
74056454,46,0,1,61114000000010075,a,0.000470,77.316026,31.750000,109.066026,63.500000,2022-09-10,D,MOBS_NEU-GLEAM_FLU
74056455,46,0,1,61114000000010052,a,0.000025,59.888269,31.750000,91.638269,63.500000,2022-09-10,D,MOBS_NEU-GLEAM_FLU
74056456,46,0,1,61114000000100469,a,0.000004,54.270752,31.750000,86.020752,63.500000,2022-09-10,D,MOBS_NEU-GLEAM_FLU
74056457,46,0,1,61114000000074158,a,0.000121,75.445061,31.750000,107.195061,63.500000,2022-09-10,D,MOBS_NEU-GLEAM_FLU


In [96]:
len(pred.run_id.unique())

277845

In [88]:
predictions

Unnamed: 0,sample,value,target_end_date,scenario_id,scenario_name,model_projection_date,location,target,age_group,Model
0,1,9742.086816,2022-12-10,A-2022-12-04,highVE_optImm,2022-12-04,US,1 wk ahead inc hosp,0-130,NIH-FluD
1,2,8818.316345,2022-12-10,A-2022-12-04,highVE_optImm,2022-12-04,US,1 wk ahead inc hosp,0-130,NIH-FluD
2,3,9943.335645,2022-12-10,A-2022-12-04,highVE_optImm,2022-12-04,US,1 wk ahead inc hosp,0-130,NIH-FluD
3,4,11067.355525,2022-12-10,A-2022-12-04,highVE_optImm,2022-12-04,US,1 wk ahead inc hosp,0-130,NIH-FluD
4,5,8985.992032,2022-12-10,A-2022-12-04,highVE_optImm,2022-12-04,US,1 wk ahead inc hosp,0-130,NIH-FluD
...,...,...,...,...,...,...,...,...,...,...
540795,96,6.022537,2023-06-03,D-2022-12-04,lowVE_pesImm,2022-12-04,US,26 wk ahead inc hosp,0-130,JHU_IDD-CovidSP
540796,97,3.679030,2023-06-03,D-2022-12-04,lowVE_pesImm,2022-12-04,US,26 wk ahead inc hosp,0-130,JHU_IDD-CovidSP
540797,98,10.048798,2023-06-03,D-2022-12-04,lowVE_pesImm,2022-12-04,US,26 wk ahead inc hosp,0-130,JHU_IDD-CovidSP
540798,99,2.329721,2023-06-03,D-2022-12-04,lowVE_pesImm,2022-12-04,US,26 wk ahead inc hosp,0-130,JHU_IDD-CovidSP
