In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append('../../')

from utils.fitting.loss import Loss_Calculator

import os
from copy import copy, deepcopy
import pickle
import re
from datetime import datetime, date, timedelta
from glob import glob

from utils.generic.config import read_config
from utils.generic.reichlab import *
from viz.reichlab import *
from viz import plot_ptiles
from viz import axis_formatter
from viz.uncertainty import plot_ptiles_reichlab

In [None]:
us_states_abbv_dict = get_mapping(which='location_name_to_abbv')
us_states_loc_dict = get_mapping(which='location_name_to_code')

In [None]:
predictions_pkl_filename = '/Users/sansiddhjain/projects/covid/2020_1214_194241_comb//predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict = pickle.load(f)

In [None]:
plt.rcParams.update({
    'text.usetex': True,
    'font.size': 15,
    'font.family': 'Palatino',
})

In [None]:
predictions_pkl_filename = '/Users/sansiddhjain/projects/covid/2020_1111_162416_beta/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict = pickle.load(f)

In [None]:
import matplotlib as mpl
## for Palatino and other serif fonts use:
plt.rcParams.update({
    'text.usetex': True,
    'font.size': 15,
    'font.family': 'Palatino',
 })

In [None]:
try:
    config = predictions_dict[list(predictions_dict.keys())[0]]['m2']['run_params']
except:
    config_filename = 'us2.yaml'
    config = read_config(config_filename)['fitting']

loss_comp = config['loss']['loss_compartments'][0]
data_last_date = config['split']['end_date']
date_of_submission = (data_last_date + timedelta(days=2)).strftime('%Y-%m-%d')
if loss_comp == 'deceased':
    comp = 'cum_death'
if loss_comp == 'total':
    comp = 'cum_case'
print(comp)
print(date_of_submission)

list_of_models = get_list_of_models(date_of_submission, comp, reichlab_path='../../../covid19-forecast-hub',
                                    num_submissions_filter=45)
df_all_submissions = process_all_submissions(list_of_models, date_of_submission, comp, reichlab_path='../../../covid19-forecast-hub')
df_gt, df_gt_loss, df_gt_loss_wk = process_gt(comp, df_all_submissions, reichlab_path='../../../covid19-forecast-hub')

df_wiai_submission = format_wiai_submission(predictions_dict, loc_name_to_key_dict, which_fit='m2', 
                                            use_as_point_forecast='ensemble_mean', skip_percentiles=False)

df_all_submissions = combine_wiai_subm_with_all(df_all_submissions, df_wiai_submission, comp)

df_comb, df_mape, df_rank = compare_gt_pred(df_all_submissions, df_gt_loss_wk)
df_mape.drop(['Guam', 'Virgin Islands', 'Northern Mariana Islands'], axis=1, inplace=True)
df_rank.drop(['Guam', 'Virgin Islands', 'Northern Mariana Islands'], axis=1, inplace=True)

num_models = len(df_mape.median(axis=1))
print(f'Total # of models - {num_models}')
median_mape = df_mape.loc[:, np.logical_not(df_mape.loc['Wadhwani_AI', :].isna())].median(axis=1).rename('median_mape')
median_rank = df_rank.loc[:, np.logical_not(df_rank.loc['Wadhwani_AI', :].isna())].median(axis=1).rename('median_rank')
merged = pd.concat([median_mape, median_rank], axis=1)

In [None]:
df = calculate_z_score(df_mape, df_rank, model_name='Wadhwani_AI')

fig, ax = create_heatmap(df, var_name='non_param_z_score', center=0)
fig.savefig('../../../paper/plots/heatmap-non-param-z-score.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

fig, ax = create_heatmap(df_zscore, var_name='non_param_z_score', center=0)

df_wadhwani = combine_with_train_error(predictions_dict, df_zscore)

print(f'# -ve non param Z score {len(df_wadhwani[df_wadhwani["non_param_z_score"] <= 0])}')
print(f'# +ve non param Z score {len(df_wadhwani[df_wadhwani["non_param_z_score"] > 0])}')

fig = create_scatter_plot_mape(df_wadhwani, annotate=True, abbv=True, abbv_dict=us_states_abbv_dict, 
                               stat_metric_to_use='non_param_z_score', log_scale=True)

fig.savefig('../../../paper/plots/train-test-scatter.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
model, state = ('Karlen-pypm', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
model, state = ('Wadhwani_AI-BayesOpt', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
model, state = ('Wadhwani_AI', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
fig, axs = plt.subplots(figsize=(25, 12), nrows=1, ncols=3)
ax0 = axs.flat[0]
error_barchart(df_comb, quant=0.2, color='teal', ftype='quantile', loss_fn='mape_perc', plot_rank=False, ax=ax0)

ax1 = axs.flat[1]
error_barchart(df_comb, quant=0.5, color='C0', ftype='quantile', loss_fn='mape_perc', plot_rank=False, ax=ax1)

ax2 = axs.flat[2]
error_barchart(df_comb, quant=0.8, color='firebrick', ftype='quantile', loss_fn='mape_perc', plot_rank=False, ax=ax2)

fig.tight_layout()

In [None]:
quant = 0.8
fig, ax = error_barchart(df_comb, quant, color='firebrick', latex=False)
# fig.savefig(f'../../../paper/plots/quant-loss-{quant}.pdf',
#             format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
quantiles = pd.unique(df_comb['quantile'])[:-1]
df_med_mape_allqs = pd.DataFrame(index=pd.unique(df_comb['model']), columns=quantiles)

for quant in quantiles:
    df_temp = df_comb[df_comb['quantile'] == quant]
    df_mape = df_temp.groupby(['model', 'location', 'location_name']).mean().reset_index()

    df_mape = df_mape.pivot(index='model', columns='location_name',
                            values='mape_perc')
    df_temp = df_mape.loc[:, np.logical_not(df_mape.loc['Wadhwani_AI-BayesOpt', :].isna())]
    df_med_qmape = pd.DataFrame([df_temp.median(axis=1)]).T
    df_med_mape_allqs.loc[:, quant] = df_med_qmape[0]

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
for model in df_med_mape_allqs.index:
    color = 'C0' if model == 'Wadhwani_AI-BayesOpt' else 'lightgrey'
    label = model if model == 'Wadhwani_AI-BayesOpt' else None
    if model == 'Wadhwani_AI-BayesOpt':
        ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], label='Wadhwani\_AI-BayesOpt', color='orangered')
    elif model == 'UMass-MechBayes':
        ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], label='UMass-MechBayes', color='cornflowerblue')
#     elif model == 'YYG-ParamSearch':
#         ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], label='YYG-ParamSearch', color='goldenrod')
    else:
        ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], color='lightgrey')

ax.set_ylabel('Quantile Loss MAPE')
ax.set_xlabel('Quantile')
ax.set_xticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
ax.legend()
ax.set_title('Median Quantile Loss across states for all quantiles')
fig.savefig('../../../paper/plots/qtile-loss-reichlab.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
plot_ptiles_reichlab(df_comb, 'Wadhwani_AI-BayesOpt', int(us_states_loc_dict['Texas']), 
                     target='cum death', plot_true=True, plot_individual_curves=False, color='orangered', ax=ax, latex=True)
plot_ptiles_reichlab(df_comb, 'UMass-MechBayes', int(us_states_loc_dict['Texas']), 
                     target='cum death', plot_true=True, plot_individual_curves=False, color='cornflowerblue', ax=ax, latex=True)
legend_elements = [
    Line2D([0], [0], ls='--', marker='o', color='red', label=f'Cum Death (Observed)'), 
    Line2D([0], [0], ls='-', marker='o', color='orange', label=f'Wadhwani\_AI-BayesOpt Point Forecast'),
    Line2D([0], [0], ls='-', marker='o', color='green', label=f'UMass-MechBayes Point Forecast'),
    Patch(facecolor='orangered', edgecolor='orangered', alpha=0.1, label=f'Wadhwani\_AI-BayesOpt 95\% CI'),
    Patch(facecolor='cornflowerblue', edgecolor='cornflowerblue', alpha=0.1, label=f'UMass-MechBayes 95\% CI')
]
ax.legend(handles=legend_elements)
ax.grid()
ax.set_xlim(ax.get_xlim()[0], ax.get_xlim()[1] - 18)
ax.set_title('Cumulative Death Forecasts for Texas')
fig.savefig('../../../paper/plots/95pc-ci-reichlab.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
create_performance_table(df_mape, df_rank)

In [None]:
gdf = preprocess_shape_file(filename='cb_2018_us_state_5m/cb_2018_us_state_5m.shp')

In [None]:
df_wadhwani.loc[['Connecticut', 'Wyoming']]

In [None]:
vars_to_plot = {
    'non_param_z_score' : {'cmap':'RdYlGn_r', 'vmin':-1, 'vcenter':0, 'vmax':1},
    'model_rank' : {'cmap':'Purples', 'vmin':0, 'vcenter':13, 'vmax':26},
}
fig, axs = plot_multiple_choropleths(df_wadhwani, gdf, vars_to_plot)
fig.savefig('../../../paper/plots/choropleth.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
vars_to_plot = {
    'z_score' : {'cmap':'RdYlGn_r', 'vmin':-1, 'vcenter':0, 'vmax':1},
    'model_rank' : {'cmap':'Purples', 'vmin':0, 'vcenter':13, 'vmax':26},
}
fig, axs = plot_multiple_choropleths(df_wadhwani, gdf, vars_to_plot)

In [None]:
fig, ax = create_scatter_plot_zscores(df_wadhwani)

In [None]:
fig, axs = plot_ecdf_all_states(df_mape)

In [None]:
fig, axs = plt.subplots(figsize=(12, 6), ncols=2)
plot_ecdf_single_state(df_mape, 'Connecticut', axs[0], model='Wadhwani_AI')
plot_ecdf_single_state(df_mape, 'Wyoming', axs[1], model='Wadhwani_AI')
fig.suptitle('Emperical Cumulative Distribution Plots')
fig.savefig('../../../paper/plots/ecdf_plots.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
fig, axs = plot_qq_all_states(df_mape, fit=False, df_wadhwani=df_wadhwani)