In [1]:
import pandas as pd
import numpy as np
import datetime
from scorepi import *
from ensemble_models import *
from collections import defaultdict
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option("display.precision", 2)

## Configurations

In [2]:
max_date = None

In [3]:
rd = 5
start_week = Week(2021,18)
end_week = Week(2021,43)
max_date = datetime.datetime(2021, 6, 23)
models = ["Ensemble","Ensemble_LOP","JHUAPL-Bucky","JHU_IDD-CovidSP","Karlen-pypm",
          "MOBS_NEU-GLEAM_COVID","UNCC-hierbin","USC-SIkJalpha","UVA-adaptive"]

In [4]:
# rd = 12
# start_week = Week(2022,2)
# end_week = Week(2022,13)
# max_date = datetime.datetime(2022, 3, 15)
# models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID","NCSU-COVSIM","NotreDame-FRED","UNCC-hierbin","USC-SIkJalpha",
#           "UTA-ImmunoSEIRS","UVA-EpiHiper","UVA-adaptive"]

In [5]:
# rd = 14
# start_week = Week(2022,23)
# end_week = Week(2023,22)
# models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID","MOBS_NEU-GLEAM_COVID_OT","NCSU-COVSIM",
#           "UNCC-hierbin","USC-SIkJalpha","USC-SIkJalpha-update","UTA-ImmunoSEIRS","UVA-adaptive"]

In [6]:
target = 'death'
# target = 'hosp'
incidence = True
# target_location = 'US'
# target_location = 6
# target_location = 12
# target_location = 36
target_location = 50
locations = pd.read_csv('./dat/locations.csv',dtype={'location':str})
locations = locations[locations['location'] != 'US']
locations['location'] = locations['location'].astype(int)
location_mapping = dict()
for loc in locations['location']:
    location_mapping[loc] = locations[locations['location'] == loc]['abbreviation'].unique()[0]

## Pull surveillance data

In [7]:
# if target == 'hosp':
#     target_obs = 'hospitalization'
# else:
#     target_obs = target
# observations = Observations(pd.read_csv(f"./dat/truth_{start_week.isoformat()}_{end_week.isoformat()}_{'inc' if incidence else 'cum'}_{target_obs}_national.csv",
#                                parse_dates=['date']), other_ind_cols=[])
# observations = observations.filter(observations[observations.t_col] <= max_date)

In [8]:
if target == 'hosp':
    target_obs = 'hospitalization'
else:
    target_obs = target
if target_location == 'US': 
    df = pd.read_csv(f"./dat/truth_{start_week.isoformat()}_{end_week.isoformat()}_{'inc' if incidence else 'cum'}_{target_obs}_national.csv",
                               parse_dates=['date'])
else:
    df = pd.read_csv(f"./dat/truth_{start_week.isoformat()}_{end_week.isoformat()}_{'inc' if incidence else 'cum'}_{target_obs}_state.csv",
                               parse_dates=['date'])
    df = df[df['location'] == target_location].copy()
observations = Observations(df, other_ind_cols=[])
if max_date:
    observations = observations.filter(observations[observations.t_col] <= max_date)

In [9]:
target_prediction_list = [f"{i} wk ahead {'inc' if incidence else 'cum'} {target}" for i in range(1,len(observations)+1)]

## Load predictions, filter and score each model

If not well conditionned (quantile incoherent) we just skip the model

In [10]:
# predictions = pd.read_csv(f'./dat/smh/rd{rd}/Ensemble.csv',dtype={'location':str},parse_dates=['target_end_date'])

In [11]:
def check_coherence(predictions): 
    quantiles = predictions['quantile'].dropna().unique()
    qlist = quantiles[quantiles < 0.5]
    qlist_ = list(reversed(quantiles[quantiles > 0.5]))
    for i,q in enumerate(qlist):
        low = predictions[(predictions['quantile'] == q) & (predictions['location'] == 'US')].sort_values(
            by=['target_end_date'])
        up = predictions[(predictions['quantile'] == qlist_[i]) & (predictions['location'] == 'US')].sort_values(
            by=['target_end_date'])
        if not all((up['value'].to_numpy() - low['value'].to_numpy()) > 0):
            raise RuntimeError(f"quantile {q} and {qlist_[i]} now well conditioned")

In [12]:
agg_data_df_list = []
for model in models:    
    try:
        predictions = pd.read_csv(f'./dat/smh/rd{rd}/{model}.csv',dtype={'location':str},parse_dates=['target_end_date'])

        #filter
        predictions_nat = predictions[(predictions['location'] == 'US') & (predictions['target'].isin(target_prediction_list))]
        predictions_state = predictions[(predictions['location'] != 'US') & (predictions['target'].isin(target_prediction_list))]
        predictions_state['location'] = predictions_state['location'].astype(int)
        predictions_state = predictions_state.merge(locations, how='inner', on='location')

        #choose location
        if target_location == 'US':
            pred = predictions_nat.copy()
        else:
            pred = predictions_state[predictions_state['location'] == target_location].copy()
            
        #filter max date
        if max_date:
            pred = pred[pred['target_end_date'] <= max_date]

        if len(pred) == 0:
            raise RuntimeError(f"There are no predictions for model {model} at location {target_location}")
        
        #check if incoherent quantile, remove model
        # check_coherence(pred)

        scenarios = list(pred['scenario_id'].drop_duplicates())
        predictions_list = [Predictions(pred[pred['scenario_id'] == scenario], t_col='target_end_date') for scenario in scenarios]
        med_ensemble_predictions = median_ensemble(predictions_list)
        ex_ensemble_predictions = extreme_ensemble(predictions_list)
        predictions_list += [med_ensemble_predictions, ex_ensemble_predictions]
        labels = ["Scenario " + scenario[0] for scenario in scenarios] + ["Median aggregate", "Extreme aggregate"]

        aggregated_scores = dict()
        for label,predictions in zip(labels,predictions_list):
            d,_ = score_utils.all_scores_from_df(observations, predictions, mismatched_allowed=False)
            aggregated_scores[label] = d

        aggregated_data = defaultdict(list)
        for label in labels:
            aggregated_data['Case'] += [label]
            aggregated_data['WIS'] += [aggregated_scores[label]['wis_mean']]

        agg_data_df = pd.DataFrame(aggregated_data)
        agg_data_df['Model'] = model
        agg_data_df_list.append(agg_data_df)
    except Exception as e:
        print(e)
        print(model)

In [13]:
agg_data_df = pd.concat(agg_data_df_list)
agg_data_df = agg_data_df.pivot(index=['Model'], columns=['Case'])
# agg_data_df[('WIS','Scenario average')] = agg_data_df['WIS'][['Scenario A','Scenario B','Scenario C','Scenario D']].mean(axis=1)
agg_data_df = agg_data_df.reindex(columns=[('WIS','Median aggregate'),
                                           ('WIS','Extreme aggregate'),
                                           # ('WIS','Scenario average'),
                                           ('WIS','Scenario A'),
                                           ('WIS','Scenario B'),
                                           ('WIS','Scenario C'),
                                           ('WIS','Scenario D')])
minval = agg_data_df.to_numpy().min()
agg_data_df = agg_data_df.apply(lambda val: val/minval)
agg_data_df = agg_data_df.join(agg_data_df.rank(method='min').astype(int).rename(columns={'WIS':'Rank'}))
agg_data_df = agg_data_df.rename(columns={'WIS':'Relative WIS'})
agg_data_df = agg_data_df.sort_values(by=[('Rank','Median aggregate')])

In [14]:
def make_pretty(styler):
    styler.format(precision=2)
    # styler.format_index(lambda v: v.strftime("%A"))
    styler.background_gradient(axis=None, vmin=1, vmax=5, cmap="Blues")
    return styler
def highlight_min(s, props=''):
    return np.where(s == np.nanmin(s.values), props, '')
def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')

In [15]:
target_loc_name = 'US' if target_location == 'US' else location_mapping[target_location]

In [16]:
s = agg_data_df['Relative WIS'].style.pipe(make_pretty)
s = s.apply(highlight_min, props='color:green', axis=0)
# s = s.apply(highlight_max, props='color:', axis=0)
s = s.set_caption(f"Relative WIS, Round {rd}, target = {target_obs}, location = {target_loc_name}")

In [17]:
caption = {'selector': 'caption','props': 'caption-side: top; font-size:1.5em;'}

cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: grey; font-weight:normal;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: lightgrey; color: #1a1a1a;'
}
values = {'selector': 'td', 'props': 'text-align: right;'}
s.set_table_styles([caption,cell_hover, index_names, headers, values])
s

Case,Median aggregate,Extreme aggregate,Scenario A,Scenario B,Scenario C,Scenario D
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ensemble_LOP,1.01,1.04,1.0,1.02,1.01,1.04
Karlen-pypm,1.02,1.12,1.0,1.03,1.09,1.05
USC-SIkJalpha,1.03,1.02,1.03,1.03,1.04,1.05
Ensemble,1.05,1.09,1.02,1.08,1.03,1.1
JHU_IDD-CovidSP,1.23,1.25,1.22,1.23,1.24,1.29
MOBS_NEU-GLEAM_COVID,1.35,1.4,1.38,1.28,1.41,1.36
UVA-adaptive,1.72,1.61,1.53,1.92,1.54,1.93
JHUAPL-Bucky,1.95,1.96,1.91,1.98,1.92,1.99
UNCC-hierbin,2.84,2.84,2.84,2.84,2.84,2.84


In [18]:
import imgkit
from pathlib import Path
path = f"./figs/smh/rd{rd}/{'inc' if incidence else 'cum'}_{target}/{target_loc_name}/"
Path(path).mkdir(parents=True, exist_ok=True)
imgkit.from_string(s.to_html(), path + 'relative_WIS.png', options = {
        'zoom': 4,
        'width': int(1920*1.8), 
        'quality': 100})

Loading page (1/2)
Rendering (2/2)                                                    
Done                                                               


True