In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
from glob import glob
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
from scipy.stats import zscore
import os
import copy
import pickle
import re

from utils.generic.reichlab import *

In [None]:
predictions_pkl_filename = '/scratch/users/sansiddh/covid-modelling/2020_1022_014310/predictions_dict.pkl'
with open(predictions_pkl_filename, 'rb') as f:
    predictions_dict = pickle.load(f)

In [None]:
date_of_submission = '2020-09-21'
comp = 'cum_death'

In [None]:
list_of_models = get_list_of_models(date_of_submission, comp, reichlab_path='../../../covid19-forecast-hub')
df_all_submissions = process_all_submissions(list_of_models, date_of_submission, comp, reichlab_path='../../../covid19-forecast-hub')
df_gt, df_gt_loss, df_gt_loss_wk, loc_name_to_key_dict = process_gt(comp, df_all_submissions, reichlab_path='../../../covid19-forecast-hub')

In [None]:
df_wiai_submission = format_wiai_submission(predictions_dict, df_all_submissions, loc_name_to_key_dict, use_as_point_forecast='ensemble_mean')
df_all_submissions = combine_wiai_subm_with_all(df_all_submissions, df_wiai_submission, comp)

In [None]:
df_comb, df_mape, df_rank = compare_gt_pred(df_all_submissions, df_gt_loss_wk)

In [None]:
df_mape

In [None]:
df_rank

In [None]:
df_rank.median(axis=1).sort_values()

In [None]:
df_rank.mean(axis=1).sort_values()

In [None]:
df_mape.median(axis=1).sort_values()

In [None]:
df = pd.concat([df_mape.mean(axis=0), df_mape.std(axis=0), df_mape.loc['Wadhwani_AI', :]], axis=1)
df.columns = ['mean_mape', 'std_mape', 'wadhwani_mape']
df['z_score'] = (df['wadhwani_mape'] - df['mean_mape'])/(df['std_mape'])

In [None]:
df

In [None]:
fig, ax = plt.subplots(figsize=(6, 30))
sns.heatmap(df['z_score'].to_numpy().reshape(-1, 1), cmap='coolwarm', center=0, annot=True, ax=ax, yticklabels=df.index, xticklabels=['z_score'])
fig.savefig('zscore.png')

In [None]:
df_wadhwani = pd.DataFrame(index=list(predictions_dict.keys()), columns=['best_loss_train', 'test_loss', 'T_recov_fatal', 'P_fatal'])

for loc in predictions_dict.keys():
    df_wadhwani.loc[loc, 'best_loss_train'] = predictions_dict[loc]['m2']['df_loss'].to_numpy()[0][0]
    df_wadhwani.loc[loc, 'T_recov_fatal'] = predictions_dict[loc]['m2']['best_params']['T_recov_fatal']
    df_wadhwani.loc[loc, 'P_fatal'] = predictions_dict[loc]['m2']['best_params']['P_fatal']

In [None]:
df_wadhwani['test_loss'] = df_mape.loc['Wadhwani_AI', :].T.dropna()

In [None]:
df_wadhwani.drop(['Northern Mariana Islands', 'Guam', 'Virgin Islands'], axis=0, inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
ax.scatter(df_wadhwani['best_loss_train'], df_wadhwani['test_loss'])
for i, (index, row) in enumerate(df_wadhwani.iterrows()):
    ax.annotate(index, (row['best_loss_train'], row['test_loss']))
ax.set_xlabel('MAPE on training data (calculated daily)')
ax.set_ylabel('MAPE on unseen data (calculated weekly)')
ax.axvline(1, ls=':', c='red', label='train error threshold')
ax.axhline(6, ls=':', c='red', label='test error threshold')

In [None]:
df_bad = df_wadhwani.loc[(df_wadhwani['best_loss_train'] > 1) | (df_wadhwani['test_loss'] > 6)]

In [None]:
df_bad

In [None]:
%matplotlib inline

In [None]:
for col_name, mapes in df_mape.loc[:, df_bad.index].iteritems():
    fig, ax = plt.subplots(figsize=(18, 2))
    sns.heatmap(mapes.to_numpy().reshape(1, -1), cmap='Reds', ax=ax, xticklabels=mapes.index, annot=True)
    ax.set_title(col_name)
    plt.show()

In [None]:
for state in df_bad.index:
    fig = predictions_dict[state]['m2']['plots']['forecasts_ptiles']['deceased']
    show_figure(fig)
    fig.show()