In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from hyperopt import hp, tpe, fmin, Trials
from tqdm import tqdm
# from tqdm.notebook import tqdm

from collections import OrderedDict, defaultdict
import itertools
from functools import partial
import datetime
from joblib import Parallel, delayed
import copy
import json

import sys; sys.path.append('../../')

from data.dataloader import Covid19IndiaLoader
from data.processing.processing import get_data, get_district_time_series, get_dataframes_cached

from models.seir.seir_testing import SEIR_Testing
from main.seir.optimiser import Optimiser
from utils.loss import Loss_Calculator
from main.seir.fitting import single_fitting_cycle, train_val_split
from main.seir.forecast import create_region_csv, create_all_csvs, write_csv, get_forecast
from viz.forecast import plot_forecast

## Comparison of E/Hosp and I/Hosp ratios

In [None]:
# for district in predictions_dict.keys():
#     district_dict = predictions_dict[district]
    
#     fig, ax = plt.subplots(figsize=(12, 12))
#     ax.plot(district_dict['m1']['df_prediction']['date'], district_dict['m1']['df_prediction']['E'] / district_dict['m1']['df_prediction']['hospitalised'],
#             '-', color='C0', label='E / Hosp (M1)')
#     ax.plot(district_dict['m1']['df_prediction']['date'], district_dict['m1']['df_prediction']['I'] / district_dict['m1']['df_prediction']['hospitalised'],
#             '-.', color='C0', label='I / Hosp (M1)')
#     ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
#     ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
#     ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
#     plt.ylabel('No of People')
#     plt.xlabel('Time')
#     plt.legend()
#     plt.title('I/Hosp and E/Hosp ratio for {}, {}'.format(district[0], district[1]))
#     plt.grid()
#     plt.show()

## Load Covid19india Data

In [None]:
# dataframes = loader.get_covid19india_api_data()
dataframes = get_dataframes_cached()

In [None]:
predictions_dict = {}

## Select Districts to fit on

In [None]:
# districts_to_show = [('Maharashtra', 'Pune'), 
#                      ('Maharashtra', 'Mumbai'), 
#                      ('Rajasthan', 'Jaipur'), 
#                      ('Gujarat', 'Ahmedabad'), 
#                      ('Karnataka', 'Bengaluru Urban'),
#                      ('Delhi', None)]

#districts_to_show = [('Maharashtra', 'Pune')]
districts_to_show = [('Maharashtra', 'Mumbai')]

## Perform M1 and M2 fits

In [None]:
# for state, district in districts_to_show:
#    predictions_dict[(state, district)] = {}
#    predictions_dict[(state, district)]['m1'] = single_fitting_cycle(
#        dataframes, state, district, train_period=7, val_period=7, 
#        data_from_tracker=True, initialisation='intermediate',
#        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
#    predictions_dict[(state, district)]['m2'] = single_fitting_cycle(
#        dataframes, state, district, train_period=7, val_period=0, 
#        data_from_tracker=True, initialisation='intermediate',
#        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])

In [None]:
sns.set_style("dark")
for state, district in districts_to_show:
    predictions_dict[(state, district)] = {}
    predictions_dict[(state, district)]['m1'] = single_fitting_cycle(
        dataframes, state, district, train_period=7, val_period=7, 
        data_from_tracker=False,
        # filename='../../data/data/official-pune-21-05-20.csv', data_format='new',
        # filename='../../data/data/official-mumbai-27-05-20.csv', data_format='old',
        initialisation='intermediate',
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'],
        smooth_jump = True,
    )
    predictions_dict[(state, district)]['m2'] = single_fitting_cycle(
        dataframes, state, district, train_period=7, val_period=0, 
        data_from_tracker=False, 
        # filename='../../data/data/official-pune-25-05-20.csv', data_format='new',
        # filename='../../data/data/official-mumbai-27-05-20.csv', data_format='old',
        initialisation='intermediate',
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'],
        smooth_jump = True
    )

## Create Master Loss Dataframe

### M1 Loss

In [None]:
predictions_dict.keys()

In [None]:
predictions_dict[('Maharashtra','Mumbai')]['m1'].keys()

In [None]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m1']['df_loss'].columns, predictions_dict[starting_key]['m1']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in districts_to_show:
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m1']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

### M2 Loss

In [None]:
#predictions_dict[('Maharashtra','Pune')]['m2']
predictions_dict[('Maharashtra','Mumbai')]['m2'].keys()

In [None]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m2']['df_loss'].columns, predictions_dict[starting_key]['m2']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m2']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

## Plot Forecasts

In [None]:
for region in predictions_dict.keys():
    plot_forecast(predictions_dict[region], region, both_forecasts=False, error_bars=True)

## Create and Save Output CSV

In [None]:
df_output = create_all_csvs(predictions_dict, icu_fraction=0.02)

In [None]:
write_csv(df_output, '../../output-mumbai-{}.csv'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

In [None]:
losses_array = np.array([trial['result']['loss'] for trial in predictions_dict[(state, district)]['m1']['trials']])
#losses_array = np.array([trial['result']['loss'] for trial in predictions_dict[(state, district)]['m2']['trials']])
least_losses_indices = np.argsort(losses_array)
losses_array[least_losses_indices][:10]

In [None]:
params_array = []
for trial in predictions_dict[(state, district)]['m1']['trials']:
#for trial in predictions_dict[(state, district)]['m2']['trials']:
    params_dict = copy.copy(trial['misc']['vals'])
    for key in params_dict.keys():
        params_dict[key] = params_dict[key][0]
    params_array.append(params_dict)

params_array = np.array(params_array)

In [None]:
least_losses_indices[:10]

In [None]:
params_array[least_losses_indices[:10]]

In [None]:
#predictions_array = [get_forecast(predictions_dict[('Maharashtra', 'Pune')],
#                                  best_params=params_dict) for params_dict in params_array[least_losses_indices[:10]]]
predictions_array = [get_forecast(predictions_dict[('Maharashtra', 'Mumbai')],
                                  best_params=params_dict) for params_dict in params_array[least_losses_indices[:10]]]

In [None]:
df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m1']['df_district']
#df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m2']['df_district']

#sns.set_style("ticks")
#sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize=(12, 12))
ax.plot(df_true['date'], df_true['total_infected'],
        '-o', color='C0', label='Confirmed Cases (Observed)')
for i, df_prediction in enumerate(predictions_array):
    loss_value = np.around(np.sort(losses_array)[:10][i], 2)
    sns.lineplot(x="date", y="total_infected", data=df_prediction,
                 ls='-', label='Confirmed Cases ({})'.format(loss_value))
    plt.text(x=df_prediction['date'].iloc[-1], y=df_prediction['total_infected'].iloc[-1], s=loss_value)
    
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.ylabel('No of People', fontsize=16)
plt.yscale('log')
plt.xlabel('Time', fontsize=16)
plt.legend()
plt.title('Forecast - ({} {})'.format(region[0], region[1]), fontsize=16)
#plt.grid()
plt.show()     

In [None]:
df_true

In [None]:
df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m1']['df_district']
#df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m2']['df_district']
sns.set_style("darkgrid")
fig, ax = plt.subplots(figsize=(12, 12))
ax.plot(df_true['date'], df_true['hospitalised'],
        '-o', color='orange', label='Active Cases (Observed)')
for i, df_prediction in enumerate(predictions_array):
    loss_value = np.around(np.sort(losses_array)[:10][i], 2)
    params_dict = params_array[least_losses_indices[:10]]
    true_r0 = params_dict[i]['lockdown_R0'] #np.around(params_dict[i]['lockdown_R0']*params_dict[i]['intervention_amount'], 2)
    #if true_r0 > 1.7:
    #    continue
    #if true_r0 == 0.59 or true_r0 == 1.04:
    #    continue
    sns.lineplot(x="date", y="hospitalised", data=df_prediction,
                 ls='-', label='Active Cases ({})'.format(loss_value))
    plt.text(x=df_prediction['date'].iloc[-1], y=df_prediction['hospitalised'].iloc[-1], s=true_r0)
    
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.ylabel('No of People', fontsize=16)
# plt.yscale('log')
plt.xlabel('Time', fontsize=16)
plt.xticks(rotation=45,horizontalalignment='right')
plt.legend()
plt.title('Forecast - ({} {})'.format(region[0], region[1]), fontsize=16)
#plt.grid()
plt.show()     

## Check varying R0 values

In [None]:
params_to_plot = \
{
    'low':{'E_hosp_ratio': 0.115406221316017, 'I_hosp_ratio': 0.4763871081949848, 'P_fatal': 0.050419345187462467, 'P_severe': 0.7809853621826006, 'T_inc': 4.484977212179257, 'T_inf': 3.3342249004558697, 'T_recov_severe': 52.054485355979445, 'lockdown_R0': 1.0974118812671074},
    'medium':{'E_hosp_ratio': 0.5105188613649609, 'I_hosp_ratio': 0.3039459885534656, 'P_fatal': 0.05832975188719784, 'P_severe': 0.8668707539589996, 'T_inc': 4.554494614633725, 'T_inf': 3.3958101193846915, 'T_recov_severe': 48.49542471232895, 'lockdown_R0': 1.1227557408135034},
    'high':   {'E_hosp_ratio': 0.2806960144261442, 'I_hosp_ratio': 0.3800638057872212, 'P_fatal': 0.05326820833691313, 'P_severe': 0.8339542948758695, 'T_inc': 4.304052648857938, 'T_inf': 3.4673218430188513, 'T_recov_severe': 47.044942872805294, 'lockdown_R0': 1.3444274930627533}
}
multipliers = [0.9, 1, 1.1, 1.25]
columns_for_csv = ['date', 'total_infected', 'hospitalised', 'recovered', 'deceased']


In [None]:
def set_r0_multiplier(params_dict, mul):
    new_params = params_dict.copy()
    new_params['post_lockdown_R0']= params_dict['lockdown_R0']*mul
    return new_params


#predictions_array_mul = [get_forecast(predictions_dict[('Maharashtra', 'Mumbai')],
#                                  best_params=set_r0_multiplier(best_params_dict, mul)) \
#                                 for mul in multipliers]

In [None]:
lc = Loss_Calculator()

In [None]:

df_district = predictions_dict[districts_to_show[0]]['m2']['df_district']
df_train_nora, df_val_nora, df_true_fitting = train_val_split(
                df_district, train_rollingmean=False, val_rollingmean=False, val_size=0)

for key in params_to_plot:
    print("R0", params_to_plot[key]['lockdown_R0'])
    best_params_dict = params_to_plot[key]#predictions_dict[districts_to_show[0]]['m2']['best_params']
    df_predictions = get_forecast(predictions_dict[('Maharashtra', 'Mumbai')],
                                train_fit = "m2",
                                best_params = best_params_dict)
    df_loss = lc.create_loss_dataframe_region(df_train_nora, df_val_nora, df_predictions, train_period=7,
                             which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    print(df_loss)
    #print(df_loss.sum())

In [None]:
df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m2']['df_district']
params_array[least_losses_indices[:10]]
for key in params_to_plot:
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.plot(df_true['date'], df_true['hospitalised'],
        '-o', color='orange', label='Active Cases (Observed)')
    best_params_dict = params_to_plot[key]#predictions_dict[districts_to_show[0]]['m2']['best_params']
    predictions_array_mul = [get_forecast(predictions_dict[('Maharashtra', 'Mumbai')],
                                train_fit = "m2",
                                best_params=set_r0_multiplier(best_params_dict, mul))
                                 for mul in multipliers]
    for i, df_prediction in enumerate(predictions_array_mul):
        filename = "Mumbai-" + key + "-" + str(multipliers[i]) + ".csv"
        print(filename)
        today = datetime.date.today().strftime("%Y-%m-%d")
        path = f'../../outputs/Mumbai-{today}/'
        if not os.path.exists(path):
            os.mkdir(path)
        df_prediction[columns_for_csv].to_csv(os.path.join(path, filename))
        #loss_value = np.around(np.sort(losses_array)[:10][i], 2)
        label = multipliers[i]
        true_r0 = label*best_params_dict['lockdown_R0'] #np.around(params_dict[i]['lockdown_R0']*params_dict[i]['intervention_amount'], 2)
        #if true_r0 > 1.7:
        #    continue
        #if true_r0 == 0.59 or true_r0 == 1.04:
        #    continue
        sns.lineplot(x="date", y="hospitalised", data=df_prediction,
                     ls='-', label='Active Cases ({})'.format(label))
        plt.text(x=df_prediction['date'].iloc[-1], y=df_prediction['hospitalised'].iloc[-1], s=true_r0)

    ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
    ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.ylabel('No of People', fontsize=16)
    # plt.yscale('log')
    plt.xticks(rotation=45,horizontalalignment='right')

    plt.xlabel('Time', fontsize=16)
    plt.legend()
    plt.title('Forecast - ({} {})'.format(region[0], region[1]), fontsize=16)
    #plt.grid()
    plt.show()     

In [None]:

for elt in params_array[least_losses_indices[:10]]:
    print("R0", elt['lockdown_R0'])
    #best_params_dict = params_to_plot[key]#predictions_dict[districts_to_show[0]]['m2']['best_params']
    df_predictions = get_forecast(
        predictions_dict[('Maharashtra', 'Mumbai')],
        train_fit = "m1",
        best_params = elt)
    df_loss =  lc.create_loss_dataframe_region(
        df_train_nora, df_val_nora, df_predictions, train_period=7,
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    print(df_loss.iloc[1])


In [None]:
columns = ['forecastRunDate', 'regionType', 'region', 'model_name', 'error_function', 'error_value', 'current_total', 'current_active', 'current_recovered',
           'current_deceased', 'current_hospitalized', 'current_icu', 'current_ventilator', 'predictionDate', 'active_mean', 'active_min',
           'active_max', 'hospitalized_mean', 'hospitalized_min', 'hospitalized_max', 'icu_mean', 'icu_min', 'icu_max', 'deceased_mean',
           'deceased_min', 'deceased_max', 'recovered_mean', 'recovered_min', 'recovered_max', 'total_mean', 'total_min', 'total_max']

region = ('Maharashtra', 'Mumbai')

df_final = pd.DataFrame(columns=columns)
for params_dict in params_array[least_losses_indices[:10]]:
    df_output = create_region_csv(predictions_dict[region], region=region[1],
    regionType='district', best_params=params_dict,
    icu_fraction=0.02)
    df_final = pd.concat([df_final, df_output], ignore_index=True)
    

In [None]:
df_final.to_csv('../../outputs/mumbai-{}.csv'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

In [None]:
df_final

In [None]:
for index, params_dict in enumerate(params_array[least_losses_indices[:10]]):
    print('Loss - ', np.around(losses_array[least_losses_indices[:10][index]], 2))
    params_dict_temp = copy.copy(params_dict)
    params_dict_temp['true_R0'] = params_dict_temp['lockdown_R0']#*params_dict_temp['intervention_amount']
    #del params_dict_temp['intervention_amount']
    del params_dict_temp['lockdown_R0']
    for key in params_dict_temp.keys():
        params_dict_temp[key] = np.around(params_dict_temp[key], 2)
    print('Params - ', params_dict_temp)
    print('\n')

# Find beta and mean params for uncertainty estimation

#TODO
1. Check with Excel file (potentially different value of N) -> they use N = 1000. chekc that data is same too.
2. Check between 0 and 0.2
3. N = 2000
4. Mean forecast matching
5. Compare your code with excel (their best beta was 0.9)
6. 

-------



In [None]:
compartment_list = ['hospitalised', 'total_infected', 'deceased', 'recovered']

In [None]:
params_array_m1 = []
for trial in predictions_dict[(state, district)]['m1']['trials']:
    params_dict = copy.copy(trial['misc']['vals'])
    for key in params_dict.keys():
        params_dict[key] = params_dict[key][0]
    params_array_m1.append(params_dict)

params_array_m1= np.array(params_array_m1)

In [None]:
losses_array_m1 = np.array([trial['result']['loss'] for trial in predictions_dict[(state, district)]['m1']['trials']])
least_losses_indices_m1 = np.argsort(losses_array_m1)

In [None]:
num_samples=range(10,2000,10)
betas= np.arange(0, 1,0.025)

params_fulldict_m1 = params_array_m1[least_losses_indices_m1]
params_list = list(params_array_m1[0].keys())
means_m1 = {}
std_devs_m1 = {}
sums_m1 = {}
sqsums_m1 = {}

for i, beta in enumerate(betas):
    print(beta)
    means_m1[i] = {}
    std_devs_m1[i] = {}
    for key in params_list:
        means_m1[i][key] = []
        std_devs_m1[i][key] = []
    for nums in num_samples:
        Loss_norm_m1=0
        for key in params_list:
            sums_m1[key] = 0
            sqsums_m1[key] = 0
        tempdict_m1 = params_fulldict_m1[0:nums]
        templosses_m1 = losses_array_m1[least_losses_indices_m1[0:nums]]
        for k in range(len(tempdict_m1)):
            Loss_norm_m1 += np.exp(-beta*templosses_m1[k])
            for key in params_list:
                sums_m1[key] += np.exp(-beta*templosses_m1[k])*tempdict_m1[k][key]
                sqsums_m1[key] += np.exp(-beta*templosses_m1[k])*tempdict_m1[k][key]**2
        for key in params_list:
            means_m1[i][key].append(sums_m1[key]/Loss_norm_m1)
            std_devs_m1[i][key].append(np.sqrt(sqsums_m1[key]/Loss_norm_m1-(sums_m1[key]/Loss_norm_m1)**2))
                

In [None]:
means_m1[0]['E_hosp_ratio'][-5:]

In [None]:
params_list

In [None]:
fig, axs = plt.subplots(len(params_list),2)
plt.rcParams['figure.dpi']=200
plt.rcParams['figure.figsize']=[30, 50]
plt.rcParams.update({'font.size':20})
colors =['blue','brown', 'black','green','orange','red']
for row, key in enumerate(params_list):
    for idx, betaind in enumerate([0,1,2,5,10,30]):    
        axs[row,0].plot(num_samples, means_m1[betaind][key] , color = colors[idx],label=r'\beta=${:.1f}'.format(betas[betaind]))
        axs[row,0].set_title('{}'.format(key))
        axs[row,0].set(ylabel= "Estimated Mean")
        axs[row,1].plot(num_samples,std_devs_m1[betaind][key], color = colors[idx],label=r'$\beta=${:.1f}'.format(betas[betaind]))
        axs[row,1].set_title('{}'.format(key))
        axs[row,1].set(ylabel="Estimated Std. Devn.")
        if (row==0):
            axs[row,0].legend(loc='center right')
        if (row==3):
            axs[row,0].set(xlabel="Number of top Hyperopt samples")
            axs[row,1].set(xlabel="Number of top Hyperopt samples")

fig.tight_layout(pad = 1.0)

In [None]:
# Define N from plots!
N = 1500

In [None]:
len(means_m1[i][key])

In [None]:
# On val set, instead of taking mean param, take mean forecast. For each param set, compute forecast on the val set
#and then weight each trjectory by e^(-loss), use to pick beta
# Function(betas, params-len 1500), computes mean forecast on given time range. 
# Get_losses function for this mean forecast

def get_loss_mean_param(i):
    skip_length = 10
    mean_params = {key:means_m1[i][key][N // skip_length] for key in params_list} 
    df_train = predictions_dict[districts_to_show[0]]['m1']['df_train']
    df_val = predictions_dict[districts_to_show[0]]['m1']['df_val']
    df_predictions= get_forecast(predictions_dict[districts_to_show[0]],
                                train_fit='m1',
                                best_params=mean_params)
    df_loss =  lc.create_loss_dataframe_region(df_train, df_val, df_predictions, train_period=7,
                                 which_compartments= compartment_list )
    return df_loss





def get_mean_forecast(i, train_fit = 'm1'):
    beta = betas[i]
    if train_fit == 'm1':
        tempdict= params_fulldict_m1[0:N]
        templosses = losses_array_m1[least_losses_indices_m1[0:N]]
    else:
        tempdict= params_fulldict_m2[0:N]
        templosses = losses_array_m2[least_losses_indices_m2[0:N]]
    Loss_norm = 0
    df_predictions = pd.DataFrame()
    for k in range(len(tempdict)):
        weight_trial = np.exp(-beta*templosses[k])
        param_trial = tempdict[k]
        df_predictions_trial= get_forecast(predictions_dict[districts_to_show[0]],
                                    train_fit= train_fit,
                                    best_params=param_trial)#* weight_trial
        Loss_norm += weight_trial
        if (df_predictions.empty):
            df_predictions = df_predictions_trial[compartment_list]*weight_trial
        else:
            df_predictions += df_predictions_trial[compartment_list]*weight_trial
    df_predictions = df_predictions/Loss_norm
    df_predictions['date'] = df_predictions_trial.date
    return df_predictions

def get_loss_mean_forecast(i):
    print("FORECAST MEAN FOR ",i)
    print("*"*20)
    df_train = predictions_dict[districts_to_show[0]]['m1']['df_train']
    df_val = predictions_dict[districts_to_show[0]]['m1']['df_val']
    df_predictions =  get_mean_forecast(i)
    df_loss =  lc.create_loss_dataframe_region(df_train, df_val, df_predictions, train_period=7,
                                 which_compartments= compartment_list )
    return df_loss
    
    

In [None]:
losses = [get_loss_mean_param(i) for i in range(len(betas))]
val_losses = [loss['val'].sum() for loss in losses]
min_loss_ind = np.argmin(val_losses)
beta_min = betas[min_loss_ind]

In [None]:
np.argmin(val_losses)

In [None]:
print(beta_min )

In [None]:
losses_forecast = [get_loss_mean_forecast(i) for i in range(len(betas))]
val_losses_forecast = [loss['val'].sum() for loss in losses_forecast]
min_loss_ind_forecast = np.argmin(val_losses_forecast)

In [None]:
val_losses_forecast 

# Generate uncertainty estimates using M2 fits

In [None]:
params_array_m2 = []
for trial in predictions_dict[(state, district)]['m2']['trials']:
#for trial in predictions_dict[(state, district)]['m2']['trials']:
    params_dict = copy.copy(trial['misc']['vals'])
    for key in params_dict.keys():
        params_dict[key] = params_dict[key][0]
    params_array_m2.append(params_dict)

params_array_m2= np.array(params_array_m2)

In [None]:
params_array_m2

In [None]:
# Compute weighted estimates for m2 (only for a single point)

losses_array_m2 = np.array([trial['result']['loss'] for trial in predictions_dict[(state, district)]['m2']['trials']])
least_losses_indices_m2 = np.argsort(losses_array_m2)

nums= N
params_fulldict_m2 = params_array_m2[least_losses_indices_m2]
mean_m2 = {}
std_devs_m2 = {}
sums_m2 = {}
sqsums_m2 = {}

for key in params_list:
    sums_m2[key] = 0
    sqsums_m2[key] = 0
Loss_norm_m2 = 0
tempdict_m2 = params_fulldict_m2[0:nums]
templosses_m2 = losses_array_m2[least_losses_indices_m2[0:nums]]
for k in range(len(tempdict_m2)):
    Loss_norm_m2 += np.exp(-beta_min*templosses_m2[k])
    for key in params_list:
        sums_m2[key] += np.exp(-beta_min*templosses_m2[k])*tempdict_m2[k][key]
        sqsums_m2[key] += np.exp(-beta_min*templosses_m2[k])*tempdict_m2[k][key]**2
for key in params_list:
    mean_m2[key] = sums_m2[key]/Loss_norm_m2
    std_devs_m2[key] = np.sqrt(sqsums_m2[key]/Loss_norm_m2-(sums_m2[key]/Loss_norm_m2)**2)



In [None]:
best_params_m2 = params_fulldict_m2[:N]
best_params_total_loss_m2 = sum(np.exp(-beta_min * losses_array[least_losses_indices[:N]]))

In [None]:
def get_preds_m2():    
    df_train = predictions_dict[districts_to_show[0]]['m2']['df_train']
    df_val = predictions_dict[districts_to_show[0]]['m2']['df_val']
    df_predictions= get_forecast(predictions_dict[districts_to_show[0]],
                                train_fit='m2',
                                best_params=mean_m2)
    return df_predictions

def get_loss_m2(params):
    N = 1500
    skip_length = 10
    df_train = predictions_dict[districts_to_show[0]]['m2']['df_train']
    df_val = predictions_dict[districts_to_show[0]]['m2']['df_val']
    df_predictions= get_forecast(predictions_dict[districts_to_show[0]],
                                train_fit='m2',
                                best_params=params)
    df_loss =  lc.create_loss_dataframe_region(df_train, df_val, df_predictions, train_period=7,
                                 which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    return df_loss

In [None]:
#m2_losses = [get_loss_m2(params)['train'] for params in best_params_m2]
m2_losses = losses_array_m2[least_losses_indices_m2]

In [None]:
all_forecasts = pd.DataFrame()
for params in best_params_m2:
    all_forecasts = pd.concat([all_forecasts , get_forecast(predictions_dict[('Maharashtra', 'Mumbai')],
                                                train_fit='m2',
                                                best_params=params)], axis = 1)

In [None]:
mean_m2

In [None]:
all_forecasts

In [None]:
# This function should also pick the interbval (95% etc)
# Also give it functionality to pick a percentile
# also include a median prediction -> if 2 

    
def gen_CI(day = 1, compartment = 'hospitalised', CI = 0.95, beta = beta_min, percentile = -1):
    daily_forecasts = all_forecasts[compartment].iloc[day,:]
    sorted_daily_forecasts_indices = np.argsort(daily_forecasts)
    sorted_daily_forecasts = np.sort(daily_forecasts)
    sorted_losses_array = m2_losses[:N][sorted_daily_forecasts_indices]
    
    best_params_total_loss_m2 = sum(np.exp(-beta * sorted_losses_array))
    
    if (percentile >= 0):
        bound = 0 
        score = 0 
        threshold = percentile * best_params_total_loss_m2
        while score < threshold:
            score += np.exp(- beta*sorted_losses_array[bound])
            bound += 1
        forecast = sorted_daily_forecasts[bound]   
        return all_forecasts['date'].iloc[day,0], forecast
    else:
        threshold_factor = (1 - CI)/ 2
        threshold = threshold_factor * best_params_total_loss_m2
        upperbound = N-1
        lowerbound = 0 
        upperscore, lowerscore = 0, 0
        while upperscore < threshold:
            upperscore += np.exp(- beta*sorted_losses_array[upperbound])
            upperbound -= 1
        while lowerscore < threshold:
            lowerscore += np.exp(- beta*sorted_losses_array[lowerbound])
            lowerbound += 1
        lower_forecast = sorted_daily_forecasts[lowerbound]
        upper_forecast = sorted_daily_forecasts[upperbound]
        return all_forecasts['date'].iloc[day,0], lower_forecast, upper_forecast

In [None]:
df_predictions_mf = get_mean_forecast(min_loss_ind_forecast, 'm2')

In [None]:
df_predictions_mf[['date','hospitalised']]

In [None]:
val_losses

In [None]:
#df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m1']['df_district']

df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m2']['df_district']
df_predictions = get_preds_m2()
sns.set()
#sns.set_style("darkgrid")
colors = ['orange', 'blue', 'red', 'green']
ci_cols = {}
fig, ax = plt.subplots(figsize=(12, 12))
plt.rcParams['figure.dpi']=200
plt.rcParams['figure.figsize']=[30, 50]
plt.rcParams.update({'font.size':20})
for idx, compartment in enumerate(['hospitalised', 'total_infected', 'deceased', 'recovered']):
    ax.plot(df_true['date'], df_true[compartment],
            '-o', color= colors[idx], label= compartment)
    ci_cols[idx] = pd.DataFrame([gen_CI(day = i, compartment = compartment, CI = 0.95) for i in range(len(all_forecasts))])
    ci_cols[idx].columns = ['date', 'lower', 'upper']
    full_data = pd.concat([ci_cols[idx][['date','upper']].rename(columns={'upper':compartment}),
                           ci_cols[idx][['date', 'lower']].rename(columns={'lower':compartment}),
                           df_predictions[['date',compartment]]], axis = 0)
    
    sns.lineplot(data = full_data, y =  compartment,  x ='date' , 
                 ls='-', color = colors[idx], label= compartment+" prediction (mean param)" )
    ax.plot(df_predictions_mf['date'],  df_predictions_mf[compartment], '--',
                  color = colors[idx], label= compartment+" prediction (mean forecast)" )
    
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.ylabel('No of People', fontsize=16)
# plt.yscale('log')
plt.xlabel('Time', fontsize=16)
plt.xticks(rotation=45,horizontalalignment='right')
plt.legend()
plt.title('Forecast - ({} {})'.format(region[0], region[1]), fontsize=16)
#plt.grid()
plt.show()   
fig.tight_layout()

In [None]:
#df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m1']['df_district']

df_true = predictions_dict[('Maharashtra', 'Mumbai')]['m2']['df_district']
df_predictions = get_preds_m2()
sns.set()
#sns.set_style("darkgrid")
colors = ['orange', 'blue', 'red', 'green']
ci_cols = {}
fig, ax = plt.subplots(figsize=(12, 12))
plt.rcParams['figure.dpi']=200
plt.rcParams['figure.figsize']=[30, 50]
plt.rcParams.update({'font.size':20})
ax.plot(df_true['date'], df_true['hospitalised'],
            '-o', color= 'orange', label= 'hospitalised')
percentile_list = [0.025, 0.05, 0.1, 0.2, 0.5, 0.7, 0.8, 0.9, 0.95]
for idx, percentile in enumerate(percentile_list):
    ci_cols[idx] = pd.DataFrame([gen_CI(i, 'hospitalised', percentile= percentile) for i in range(len(all_forecasts))])
    ci_cols[idx].columns = ['date', 'forecast']
    sns.lineplot(data = ci_cols[idx], y =  'forecast',  x ='date' , 
                  label= percentile )
   
    
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.ylabel('No of People', fontsize=16)
# plt.yscale('log')
plt.xlabel('Time', fontsize=16)
plt.xticks(rotation=45,horizontalalignment='right')
plt.legend()
plt.title('Forecast - ({} {})'.format(region[0], region[1]), fontsize=16)
#plt.grid()
plt.show()   
fig.tight_layout()