In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append('../../')

from utils.fitting.loss import Loss_Calculator

import os
import copy
import pickle
import re
from datetime import datetime, date, timedelta
from glob import glob

from utils.generic.config import read_config, make_date_key_str
from utils.generic.reichlab import *
from viz.reichlab import *
from viz import plot_ptiles
from viz.uncertainty import plot_ptiles_reichlab

In [None]:
us_states_abbv_df = pd.read_csv('../../data/data/us_states_abbv.csv')
us_states_abbv_dict = dict(zip(us_states_abbv_df['state'], us_states_abbv_df['state_code']))

In [None]:
loc_name_to_key_dict = get_mapping(which='location_name_to_code')
us_states_abbv_dict = get_mapping(which='location_name_to_abbv')

In [None]:
predictions_pkl_filename = '/scratch/users/sansiddh/covid-modelling/2020_1211_183006/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict1 = pickle.load(f)

In [None]:
predictions_pkl_filename = '/scratch/users/sansiddh/covid-modelling/2020_1212_015547/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict2 = pickle.load(f)

In [None]:
predictions_pkl_filename = '/scratch/users/sansiddh/covid-modelling/2020_1212_041024/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict3 = pickle.load(f)

In [None]:
predictions_pkl_filename = '/scratch/users/sansiddh/covid-modelling/2020_1212_021044/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict4 = pickle.load(f)

In [None]:
predictions_pkl_filename = '/scratch/users/sansiddh/covid-modelling/2020_1211_221217/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict5 = pickle.load(f)

In [None]:
df_beta = pd.DataFrame(columns=[key for key in predictions_dict1.keys()])

In [None]:
df_beta.loc[0, :] = [predictions_dict1[key]['m2']['beta'] for key in predictions_dict1.keys()]
df_beta.loc[1, :] = [predictions_dict2[key]['m2']['beta'] for key in predictions_dict2.keys()]
df_beta.loc[2, :] = [predictions_dict3[key]['m2']['beta'] for key in predictions_dict3.keys()]
df_beta.loc[3, :] = [predictions_dict4[key]['m2']['beta'] for key in predictions_dict3.keys()]
df_beta.loc[4, :] = [predictions_dict5[key]['m2']['beta'] for key in predictions_dict3.keys()]

In [None]:
df_beta = df_beta.T

In [None]:
df_beta.merge(df_mape_wiai, left_index=True, right_index=True)

In [None]:
try:
    config = predictions_dict1[list(predictions_dict1.keys())[0]]['m2']['run_params']
except:
    config_filename = 'us2.yaml'
    config = read_config(config_filename)['fitting']

loss_comp = config['loss']['loss_compartments'][0]
data_last_date = config['split']['end_date']
date_of_submission = (data_last_date + timedelta(days=2)).strftime('%Y-%m-%d')
if loss_comp == 'deceased':
    comp = 'cum_death'
if loss_comp == 'total':
    comp = 'cum_case'
print(comp)
print(date_of_submission)

list_of_models = get_list_of_models(date_of_submission, comp, reichlab_path='../../../covid19-forecast-hub',
                                    num_submissions_filter=45)
df_all_submissions = process_all_submissions(list_of_models, date_of_submission, comp, reichlab_path='../../../covid19-forecast-hub')
df_gt, df_gt_loss, df_gt_loss_wk = process_gt(comp, df_all_submissions, reichlab_path='../../../covid19-forecast-hub')

In [None]:
def return_mape(predictions_dict, df_all_submissions):
    df_wiai_submission = format_wiai_submission(predictions_dict, loc_name_to_key_dict, which_fit='m2', 
                                                use_as_point_forecast='ensemble_mean', skip_percentiles=False)

    df_all = combine_wiai_subm_with_all(df_all_submissions, df_wiai_submission, comp)

    df_comb, df_mape, df_rank = compare_gt_pred(df_all, df_gt_loss_wk)
    return df_mape.loc['Wadhwani_AI', :]

In [None]:
df_wiai_submission = format_wiai_submission(predictions_dict1, loc_name_to_key_dict, which_fit='m2', 
                                            use_as_point_forecast='ensemble_mean', skip_percentiles=False)

df_all = combine_wiai_subm_with_all(df_all_submissions, df_wiai_submission, comp)

df_comb, df_mape, df_rank = compare_gt_pred(df_all, df_gt_loss_wk)

In [None]:
df_mape1 = return_mape(predictions_dict1, df_all_submissions)
df_mape2 = return_mape(predictions_dict2, df_all_submissions)
df_mape3 = return_mape(predictions_dict3, df_all_submissions)
df_mape4 = return_mape(predictions_dict4, df_all_submissions)
df_mape5 = return_mape(predictions_dict5, df_all_submissions)

In [None]:
df_mape_wiai = pd.concat([df_mape1, df_mape2, df_mape3, df_mape4, df_mape5], axis=1)
df_mape_wiai = pd.concat([df_mape_wiai, df_mape_wiai.median(axis=1)], axis=1)
df_mape_wiai.columns = ['Wadhwani_AI']*len(df_mape_wiai.columns)

In [None]:
df_mape_wiai

In [None]:
df_mape.loc['Wadhwani_AI', :] = df_mape_wiai.iloc[:, -1]

In [None]:
df_mape.loc['Wadhwani_AI', :]

In [None]:
df_rank = df_mape.rank()
try:
    df_mape.drop(['Guam', 'Virgin Islands', 'Northern Mariana Islands'], axis=1, inplace=True)
    df_rank.drop(['Guam', 'Virgin Islands', 'Northern Mariana Islands'], axis=1, inplace=True)
except:
    pass

In [None]:
merged = create_performance_table(df_mape, df_rank)
x = datetime.strptime(date_of_submission, '%Y-%m-%d')
data_last_date = (x - timedelta(days=2))
print('Data last date -  {}'.format(data_last_date.strftime('%Y-%m-%d')))
print('Test period till -  {}'.format((data_last_date + timedelta(days=28)).strftime('%Y-%m-%d')))
merged

In [None]:

num_models = len(df_mape.median(axis=1))
print(f'Total # of models - {num_models}')
median_mape = df_mape.loc[:, np.logical_not(df_mape.loc['Wadhwani_AI', :].isna())].median(axis=1).rename('median_mape')
median_rank = df_rank.loc[:, np.logical_not(df_rank.loc['Wadhwani_AI', :].isna())].median(axis=1).rename('median_rank')
merged = pd.concat([median_mape, median_rank], axis=1)

In [None]:
df = calculate_z_score(df_mape, df_rank, model_name='Wadhwani_AI')

fig, ax = create_heatmap(df, var_name='non_param_z_score', center=0)

df_wadhwani = combine_with_train_error(predictions_dict, df)

print(f'# -ve Z score {len(df_wadhwani[df_wadhwani["z_score"] <= 0])}')
print(f'# +ve Z score {len(df_wadhwani[df_wadhwani["z_score"] > 0])}')

print(f'# -ve non param Z score {len(df_wadhwani[df_wadhwani["non_param_z_score"] <= 0])}')
print(f'# +ve non param Z score {len(df_wadhwani[df_wadhwani["non_param_z_score"] > 0])}')

fig = create_scatter_plot_mape(df_wadhwani, annotate=True, abbv=True, abbv_dict=us_states_abbv_dict, 
                               stat_metric_to_use='non_param_z_score', log_scale=True)

In [None]:
merged = create_performance_table(df_mape, df_rank)
x = datetime.strptime(date_of_submission, '%Y-%m-%d')
data_last_date = (x - timedelta(days=2))
print('Data last date -  {}'.format(data_last_date.strftime('%Y-%m-%d')))
print('Test period till -  {}'.format((data_last_date + timedelta(days=28)).strftime('%Y-%m-%d')))
merged

In [None]:
model, state = ('Karlen-pypm', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
model, state = ('Wadhwani_AI', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
lc = Loss_Calculator()
df_comb['perc_loss_mape'] = np.nan
for i, row in df_comb.iterrows():
    if row['type'] == 'quantile':
        df_comb.loc[i, 'perc_loss_mape'] = lc._calc_mape_perc(np.array([row['forecast_value']]), np.array([row['true_value']]), row['quantile'])

In [None]:
quant = 0.8
fig, ax = qtile_barchart(df_comb, quant, color='firebrick', latex=True)
fig.savefig(f'../../../paper/plots/quant-loss-{quant}.pdf',
            format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
create_performance_table(df_mape, df_rank)

In [None]:
gdf = preprocess_shape_file(filename='cb_2018_us_state_5m/cb_2018_us_state_5m.shp')

In [None]:
df_wadhwani.loc[['Connecticut', 'Wyoming']]

In [None]:
vars_to_plot = {
    'non_param_z_score' : {'cmap':'RdYlGn_r', 'vmin':-1, 'vcenter':0, 'vmax':1},
    'model_rank' : {'cmap':'Purples', 'vmin':0, 'vcenter':13, 'vmax':26},
}
fig, axs = plot_multiple_choropleths(df_wadhwani, gdf, vars_to_plot)
fig.savefig('../../../paper/plots/choropleth.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
vars_to_plot = {
    'z_score' : {'cmap':'RdYlGn_r', 'vmin':-1, 'vcenter':0, 'vmax':1},
    'model_rank' : {'cmap':'Purples', 'vmin':0, 'vcenter':13, 'vmax':26},
}
fig, axs = plot_multiple_choropleths(df_wadhwani, gdf, vars_to_plot)

In [None]:
fig, ax = create_scatter_plot_zscores(df_wadhwani)

In [None]:
fig, axs = plot_ecdf_all_states(df_mape)

In [None]:
fig, axs = plt.subplots(figsize=(12, 6), ncols=2)
plot_ecdf_single_state(df_mape, 'Connecticut', axs[0], model='Wadhwani_AI')
plot_ecdf_single_state(df_mape, 'Wyoming', axs[1], model='Wadhwani_AI')
fig.suptitle('Emperical Cumulative Distribution Plots')
fig.savefig('../../../paper/plots/ecdf_plots.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
fig, axs = plot_qq_all_states(df_mape, fit=False, df_wadhwani=df_wadhwani)