In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
import scipy.stats
from scipy.stats import entropy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from hyperopt import tpe, rand

import datetime

import copy
import json
import time
import os

import sys
sys.path.append('../../')

from models.seir import SEIRHD

from main.seir.fitting import single_fitting_cycle
from main.seir.forecast import _order_trials_by_loss

In [None]:
predictions_dict = {}

## Select Districts to fit on

In [None]:
state, district = ('Maharashtra', 'Mumbai')

In [None]:
forecast_days = 54
now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
folder = str(now)
ktrials = 10
model=SEIRHD

## Perform M1 and M2 fits

In [None]:
predictions_dict['m1'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=500,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m2'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=1000,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m3'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=1500,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m4'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=2000,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m5'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=2500,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m6'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=3000,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m7'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=3500,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

predictions_dict['m8'] = single_fitting_cycle(
    state, district, train_period=21, val_period=0, num_evals=4000,
    data_from_tracker=False, initialisation='intermediate', model=model, 
    smooth_jump=True, algo=tpe,
    which_compartments=['active', 'total', 'deceased', 'recovered'])

In [None]:
params_dicts = {}
weights_dict = {}
for key in predictions_dict.keys():
    params_array, losses_array = _order_trials_by_loss(predictions_dict[key])
    params_dicts[key] = {param: [param_dict[param] for param_dict in params_array] for param in params_array[0].keys()}
    weights_dict[key] = np.exp(-np.array(losses_array))

In [None]:
param_distributions = {
    'E_hosp_ratio': scipy.stats.expon,
    'I_hosp_ratio': scipy.stats.gamma,
    'P_fatal': scipy.stats.beta,
    'T_inc': scipy.stats.norm,
    'T_inf': scipy.stats.norm,
    'T_recov_fatal': scipy.stats.norm,
    'T_recov_severe': scipy.stats.norm,
    'lockdown_R0': scipy.stats.norm
}

In [None]:
for run in params_dicts.keys():
    fig, axs = plt.subplots(nrows=len(param_distributions)//2, ncols=2, figsize=(18, 6*(len(param_distributions)//2)))
    for i, param in enumerate(params_dicts[run].keys()):
        ax = axs.flat[i]
        ax.hist(params_dicts[run][param], density=True, histtype='bar')
        ax.set_title(f'Historgram of parameter {param} for run {run}')
        ax.set_ylabel('Count')
    plt.show()

In [None]:
def create_histograms_and_plot(plot_lines=False, weighted=True, savefig=False, filename=''):
    histograms = {}
    fig, axs = plt.subplots(nrows=len(param_distributions)//2, ncols=2, figsize=(18, 6*(len(param_distributions)//2)))
    for run in params_dicts.keys():
        histograms[run] = {}
        for i, param in enumerate(params_dicts[run].keys()):
            histograms[run][param] = {}
            ax = axs.flat[i]
            if plot_lines:
                bar_heights, endpoints = np.histogram(params_dicts[run][param], density=True, bins=20, weights=weights_dict[run])
                centers = (endpoints[1:] + endpoints[:-1]) / 2
                ax.plot(centers, bar_heights, label=f'{int(run[-1])*500} samples')
            else:
                if weighted:
                    histogram = ax.hist(params_dicts[run][param], density=True, histtype='bar', bins=20, 
                                        weights=weights_dict[run], label=f'{int(run[-1])*500} samples', alpha=1)  
                else:
                    histogram = ax.hist(params_dicts[run][param], density=True, histtype='bar', bins=20, 
                                        label=f'{int(run[-1])*500} samples', alpha=1)  
                bar_heights, endpoints = histogram[0], histogram[1]
                centers = (endpoints[1:] + endpoints[:-1]) / 2
                
            histograms[run][param]['density'] = bar_heights
            histograms[run][param]['endpoints'] = endpoints
            histograms[run][param]['probability'] = bar_heights*np.mean(np.diff(endpoints))
            
            ax.set_title(f'Historgram of parameter {param}')
            ax.set_ylabel('Density')

    for ax in axs.flat:
        ax.legend()
    if savefig:
        fig.savefig(filename)
    return fig, histograms

In [None]:
fig, histograms = create_histograms_and_plot(weighted=False, savefig=False, filename='constrainted-unweighted-histogram.png')

In [None]:
fig, histograms = create_histograms_and_plot(weighted=True, savefig=False, filename='constrainted-weighted-histogram.png')

In [None]:
fig, axs = plt.subplots(nrows=len(params_dicts['m1'])//2, ncols=2, figsize=(18, 6*(len(params_dicts['m1'])//2)))
for i, param in enumerate(params_dicts['m1'].keys()):
    ax = axs.flat[i]
    kl_matrix = np.array([[entropy(histograms[run1][param]['probability'], histograms[run2][param]['probability']) for run2 in histograms.keys()] for run1 in histograms.keys()])
    sns.heatmap(kl_matrix, annot=True, xticklabels = np.arange(1, kl_matrix.shape[0]+1)*500, yticklabels = np.arange(1, kl_matrix.shape[0]+1)*500, vmax=10, ax=ax)
    ax.set_title(f'KL Divergence matrix of parameter {param}')
plt.show()
# fig.savefig('constrained-kl-matrix.png')

In [None]:
params_mean_var = copy.deepcopy(params_dicts)
for run in params_mean_var.keys():
    for param in params_mean_var[run].keys():
        params_mean_var[run][param] = np.std(params_mean_var[run][param])

In [None]:
pd.DataFrame.from_dict(params_mean_var)

In [None]:
for run in params_dicts.keys():
    fig, axs = plt.subplots(nrows=len(param_distributions)//2, ncols=2, figsize=(18, 6*(len(param_distributions)//2)))
    for i, param in enumerate(params_dicts[run].keys()):
        dist = param_distributions[param]
        param_trials = params_dicts[run][param]
        dist_fit = dist.fit(param_trials)
        sampling_points = np.linspace(np.min(param_trials), np.max(param_trials), len(param_trials))
        pdf_fitted = dist.pdf(sampling_points, *dist_fit[:-2], loc=dist_fit[-2], scale=dist_fit[-1])
        
        ax = axs.flat[i]
        ax.hist(params_dicts[run][param], density=True)
        ax.plot(sampling_points, pdf_fitted)
        ax.set_title(f'Historgram of parameter {param} for run {run}')
        ax.set_ylabel('Density')
    plt.show()

In [None]:
param_trials