In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append('../../')

from utils.fitting.loss import Loss_Calculator

import os
from copy import copy, deepcopy
import pickle
import re
from datetime import datetime, date, timedelta
from glob import glob

from utils.generic.config import read_config
from utils.generic.reichlab import *
from viz.reichlab import *
from viz import plot_ptiles
from viz import axis_formatter
from viz.uncertainty import plot_ptiles_reichlab

In [None]:
us_states_abbv_dict = get_mapping(which='location_name_to_abbv')
us_states_loc_dict = get_mapping(which='location_name_to_code')

In [None]:
predictions_pkl_filename = '/Users/sansiddhjain/projects/covid/2020_1214_194241_comb//predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict = pickle.load(f)

In [None]:
plt.rcParams.update({
    'text.usetex': True,
    'font.size': 15,
    'font.family': 'Palatino',
})

In [None]:
df_gt, _, _ = process_gt('cum_death', '2020-10-01', '2020-12-31')

In [None]:
df_comb, df_mape, df_rank = end_to_end_comparison('input', comp='cum_death', date_of_submission='2020-12-14', process_wiai_submission=False)

In [None]:
df_comb = df_comb.loc[(df_comb['model'] == 'Wadhwani_AI-BayesOpt') & (df_comb['type'] == 'point')]
states = pd.unique(df_comb['location_name'])
fig, axs = plt.subplots(figsize=(18, 7*np.ceil(len(states)/2)), nrows=int(np.ceil(len(states)/2)), ncols=2)
for i, state in enumerate(states):
    df_temp = df_comb.loc[(df_comb['location_name'] == state)]
    ax = axs.flat[i]
#     ax.plot(df_temp['target_end_date'], df_temp['true_value'], '-o', color='red', label='True Deaths')
    ax.plot(df_temp['target_end_date'], df_temp['forecast_value'], '--o', color='red', label='Forecast Deaths (Test)')
    for i, row in df_temp.iterrows():
        ax.text(row['target_end_date']+timedelta(hours=8), row['forecast_value'], '{}\%'.format(np.around(row['mape'], 2)))
        
    df_pred_trainper = deepcopy(predictions_dict[state]['m2']['forecasts']['ensemble_mean'])
    df_pred_trainper = df_pred_trainper[df_pred_trainper['date'] < min(df_comb['target_end_date'])]
    
    df_gt_plot = df_gt[df_gt['location_name'] == state]
    df_gt_plot = df_gt_plot[df_gt_plot['date'] >= min(df_pred_trainper['date'])]
    
    df_train = deepcopy(predictions_dict[state]['m1']['df_district'])
    df_train = df_train[df_train['date'] >= min(df_pred_trainper['date'])]
    
    ax.plot(df_gt_plot['date'], df_gt_plot['value'], '-', color='red', label='True Deaths')
    ax.plot(df_pred_trainper['date'], df_pred_trainper['deceased'], '--', color='red', label='Forecast Deaths (Train)')
    
    ax.axvline(df_train.iloc[-1]['date'], ls=':', c='black', label='Train Ends')
    
    axis_formatter(ax)
    ax.set_xlim(ax.get_xlim()[0], ax.get_xlim()[1] + 4)
    ax.set_title(state)
fig.tight_layout()

In [None]:
fig.savefig('reichlab-state-wise-forecast-comparison.pdf', format='pdf')

In [None]:
df_temp

In [None]:
df_comb, df_mape, df_rank = end_to_end_comparison('predictions_dict', date_of_submission='2020-12-21', 
                                                  predictions_dict=predictions_dict, use_as_point_forecast=35,
                                                  process_wiai_submission=True)

In [None]:
df = deepcopy(predictions_dict['Arkansas']['m2']['forecasts'][25])
df = df.set_index('date')
df.loc['2020-12-21':'2021-01-13', 'deceased']

In [None]:
pd.set_option('display.max_rows', None)

In [None]:
df_temp = df_comb[df_comb['type'] == 'point']
df_temp.groupby(['model', 'location','location_name', 'target']).mean().loc[('Wadhwani_AI-BayesOpt')]

In [None]:
create_performance_table(df_mape, df_rank)

# Plotting

In [None]:
df_zscore = calculate_z_score(df_mape, df_rank, model_name='Wadhwani_AI-BayesOpt')

fig, ax = create_heatmap(df_zscore, var_name='non_param_z_score', center=0)

df_wadhwani = combine_with_train_error(predictions_dict, df_zscore)

print(f'# -ve non param Z score {len(df_wadhwani[df_wadhwani["non_param_z_score"] <= 0])}')
print(f'# +ve non param Z score {len(df_wadhwani[df_wadhwani["non_param_z_score"] > 0])}')

fig = create_scatter_plot_mape(df_wadhwani, annotate=True, abbv=True, abbv_dict=us_states_abbv_dict, 
                               stat_metric_to_use='non_param_z_score', log_scale=True)

In [None]:
model, state = ('Karlen-pypm', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
model, state = ('Wadhwani_AI-BayesOpt', 'Texas')
fig, ax = plot_ptiles_reichlab(df_comb, model, state, compartment='deceased')
fig.savefig(f'../../../paper/plots/ptiles-{model}-{state}.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
fig, ax = error_barchart(df_comb, quant=None, color='C0', ftype='point', loss_fn='mape', plot_rank=False)
ax.set_title('Median MAPE')
ax.set_xlabel('MAPE')

In [None]:
fig, ax = error_barchart(df_comb, quant=None, color='C0', ftype='point', loss_fn='mape', plot_rank=True)
ax.set_title('Median Rank')
ax.set_title('Rank')

In [None]:
fig, axs = plt.subplots(figsize=(22, 12), nrows=1, ncols=2)
ax0 = axs.flat[0]
error_barchart(df_comb, quant=None, color='C0', ftype='point', loss_fn='mape', plot_rank=False, ax=ax0)
ax0.set_title('Median MAPE')
ax0.set_xlabel('MAPE')

ax1 = axs.flat[1]
error_barchart(df_comb, quant=None, color='firebrick', ftype='point', loss_fn='mape', plot_rank=True, ax=ax1)
ax1.set_title('Median Rank')
ax1.set_xlabel('Rank')
fig.tight_layout()

In [None]:
fig, axs = plt.subplots(figsize=(25, 12), nrows=1, ncols=3)
ax0 = axs.flat[0]
error_barchart(df_comb, quant=0.2, color='teal', ftype='quantile', loss_fn='mape_perc', plot_rank=False, ax=ax0)

ax1 = axs.flat[1]
error_barchart(df_comb, quant=0.5, color='C0', ftype='quantile', loss_fn='mape_perc', plot_rank=False, ax=ax1)

ax2 = axs.flat[2]
error_barchart(df_comb, quant=0.8, color='firebrick', ftype='quantile', loss_fn='mape_perc', plot_rank=False, ax=ax2)

fig.tight_layout()

In [None]:
quant = 0.8
fig, ax = error_barchart(df_comb, quant, color='firebrick', latex=False)
# fig.savefig(f'../../../paper/plots/quant-loss-{quant}.pdf',
#             format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
quantiles = pd.unique(df_comb['quantile'])[:-1]
df_med_mape_allqs = pd.DataFrame(index=pd.unique(df_comb['model']), columns=quantiles)

for quant in quantiles:
    df_temp = df_comb[df_comb['quantile'] == quant]
    df_mape = df_temp.groupby(['model', 'location', 'location_name']).mean().reset_index()

    df_mape = df_mape.pivot(index='model', columns='location_name',
                            values='mape_perc')
    df_temp = df_mape.loc[:, np.logical_not(df_mape.loc['Wadhwani_AI-BayesOpt', :].isna())]
    df_med_qmape = pd.DataFrame([df_temp.median(axis=1)]).T
    df_med_mape_allqs.loc[:, quant] = df_med_qmape[0]

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
for model in df_med_mape_allqs.index:
    color = 'C0' if model == 'Wadhwani_AI-BayesOpt' else 'lightgrey'
    label = model if model == 'Wadhwani_AI-BayesOpt' else None
    if model == 'Wadhwani_AI-BayesOpt':
        ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], label='Wadhwani\_AI-BayesOpt', color='orangered')
    elif model == 'UMass-MechBayes':
        ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], label='UMass-MechBayes', color='cornflowerblue')
#     elif model == 'YYG-ParamSearch':
#         ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], label='YYG-ParamSearch', color='goldenrod')
    else:
        ax.plot(df_med_mape_allqs.columns, df_med_mape_allqs.loc[model, :], color='lightgrey')

ax.set_ylabel('Quantile Loss MAPE')
ax.set_xlabel('Quantile')
ax.set_xticks([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
ax.legend()
ax.set_title('Median Quantile Loss across states for all quantiles')
fig.savefig('../../../paper/plots/qtile-loss-reichlab.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
plot_ptiles_reichlab(df_comb, 'Wadhwani_AI-BayesOpt', int(us_states_loc_dict['Texas']), 
                     target='cum death', plot_true=True, plot_individual_curves=False, color='orangered', ax=ax, latex=True)
plot_ptiles_reichlab(df_comb, 'UMass-MechBayes', int(us_states_loc_dict['Texas']), 
                     target='cum death', plot_true=True, plot_individual_curves=False, color='cornflowerblue', ax=ax, latex=True)
legend_elements = [
    Line2D([0], [0], ls='--', marker='o', color='red', label=f'Cum Death (Observed)'), 
    Line2D([0], [0], ls='-', marker='o', color='orange', label=f'Wadhwani\_AI-BayesOpt Point Forecast'),
    Line2D([0], [0], ls='-', marker='o', color='green', label=f'UMass-MechBayes Point Forecast'),
    Patch(facecolor='orangered', edgecolor='orangered', alpha=0.1, label=f'Wadhwani\_AI-BayesOpt 95\% CI'),
    Patch(facecolor='cornflowerblue', edgecolor='cornflowerblue', alpha=0.1, label=f'UMass-MechBayes 95\% CI')
]
ax.legend(handles=legend_elements)
ax.grid()
ax.set_xlim(ax.get_xlim()[0], ax.get_xlim()[1] - 18)
ax.set_title('Cumulative Death Forecasts for Texas')
fig.savefig('../../../paper/plots/95pc-ci-reichlab.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
create_performance_table(df_mape, df_rank)

In [None]:
gdf = preprocess_shape_file(filename='cb_2018_us_state_5m/cb_2018_us_state_5m.shp')

In [None]:
df_wadhwani.loc[['Connecticut', 'Wyoming']]

In [None]:
vars_to_plot = {
    'non_param_z_score' : {'cmap':'RdYlGn_r', 'vmin':-1, 'vcenter':0, 'vmax':1},
    'model_rank' : {'cmap':'Purples', 'vmin':0, 'vcenter':13, 'vmax':26},
}
fig, axs = plot_multiple_choropleths(df_wadhwani, gdf, vars_to_plot)
fig.savefig('../../../paper/plots/choropleth.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
vars_to_plot = {
    'z_score' : {'cmap':'RdYlGn_r', 'vmin':-1, 'vcenter':0, 'vmax':1},
    'model_rank' : {'cmap':'Purples', 'vmin':0, 'vcenter':13, 'vmax':26},
}
fig, axs = plot_multiple_choropleths(df_wadhwani, gdf, vars_to_plot)

In [None]:
fig, ax = create_scatter_plot_zscores(df_wadhwani)

In [None]:
fig, axs = plot_ecdf_all_states(df_mape)

In [None]:
fig, axs = plt.subplots(figsize=(12, 6), ncols=2)
plot_ecdf_single_state(df_mape, 'Connecticut', axs[0], model='Wadhwani_AI')
plot_ecdf_single_state(df_mape, 'Wyoming', axs[1], model='Wadhwani_AI')
fig.suptitle('Emperical Cumulative Distribution Plots')
fig.savefig('../../../paper/plots/ecdf_plots.pdf', format='pdf', bbox_inches='tight', pad_inches=0)

In [None]:
fig, axs = plot_qq_all_states(df_mape, fit=False, df_wadhwani=df_wadhwani)