In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import entropy
import datetime
import copy
import time
import wandb
import pickle as pkl

import sys
sys.path.append('../../')

from data.processing import get_data

import models

from main.seir.fitting import single_fitting_cycle
from main.seir.forecast import get_forecast, forecast_all_trials, create_all_trials_csv, create_decile_csv_new
from main.seir.sensitivity import calculate_sensitivity_and_plot
from utils.generic.create_report import save_dict_and_create_report
from utils.generic.config import read_config
from utils.generic.enums import Columns
from utils.fitting.loss import Loss_Calculator
from utils.generic.logging import log_wandb
from viz import plot_forecast, plot_top_k_trials, plot_ptiles
from viz.fit import plot_histogram, plot_all_histograms, plot_mean_variance, plot_scatter, plot_kl_divergence, plot_heatmap_distribution_sigmas, plot_all_params, plot_all_losses, plot_all_buckets
import yaml
from data.dataloader import SimulatedDataLoader


In [3]:
predictions_dict = {}

In [4]:
output_folder = '../../misc/reports/{}'.format(datetime.datetime.now().strftime("%Y_%m%d_%H%M%S"))

In [5]:
predictions_dict.keys()

dict_keys([])

In [12]:
num_rep_trials = 2
simulated_data_configs = ['seirhd_fixed.yaml']
for config_filename in simulated_data_configs:
    for m in range(num_rep_trials):
        with open(os.path.join("../../configs/simulated_data/", config_filename)) as configfile:
            config = yaml.load(configfile, Loader=yaml.SafeLoader)    
        config['output_file_name'] = config_filename.split('.')[0] + '_' + str(m) + '.csv'
        print (config)
        loader = SimulatedDataLoader()
        _, _ = loader.load_data(**config)

{'model': 'SEIRHD', 'starting_date': datetime.date(2020, 8, 18), 'total_days': 50, 'initial_values': {'recovered': 95000, 'deceased': 7500, 'active': 20000}, 'include_tests': True, 'tests_done': '[10000 for i in range(50)]', 'fix_params': True, 'params': {'N': 10000000.0, 'lockdown_R0': 0.8, 'T_inc': 5.1, 'T_inf': 3.6, 'T_recov': 21.5, 'T_recov_fatal': 25.5, 'P_fatal': 0.08, 'E_hosp_ratio': 0.34, 'I_hosp_ratio': 0.94}, 'output_file_name': 'seirhd_fixed_0.csv'}
{'model': 'SEIRHD', 'starting_date': datetime.date(2020, 8, 18), 'total_days': 50, 'initial_values': {'recovered': 95000, 'deceased': 7500, 'active': 20000}, 'include_tests': True, 'tests_done': '[10000 for i in range(50)]', 'fix_params': True, 'params': {'N': 10000000.0, 'lockdown_R0': 0.8, 'T_inc': 5.1, 'T_inf': 3.6, 'T_recov': 21.5, 'T_recov_fatal': 25.5, 'P_fatal': 0.08, 'E_hosp_ratio': 0.34, 'I_hosp_ratio': 0.94}, 'output_file_name': 'seirhd_fixed_1.csv'}


In [6]:
config_filenames = ['experiments/seirhd.yaml', 'experiments/undetected.yaml', 'experiments/seir_pu.yaml']
model_params = {
        'SEIRHD Free': [ 'lockdown_R0', 'T_inc', 'T_inf', 'T_inf', 'T_recov', 'T_recov_fatal', 'P_fatal', 'E_hosp_ratio', 'I_hosp_ratio'],
        'SEIR_Undetected': [ 'T_inc', 'T_inf_D', 'T_inf_U', 'T_recov', 'T_recov_fatal', 'beta', 'd', 'P_fatal', 'I_D_hosp_ratio', 'I_U_hosp_ratio', 'E_hosp_ratio'],
        'SEIR_PU': [ 'T_inc', 'T_inf_U', 'T_recov', 'T_recov_fatal', 'beta', 'd', 'P_fatal', 'I_hosp_ratio', 'E_hosp_ratio','Pu_pop_ratio'],
    }
model_names = list(model_params.keys())
configs = [read_config(config_filename) for config_filename in config_filenames]
location_tuples = {
    # 'MUM(Latest)' : ('Maharashtra', 'Mumbai', None, None), # peak inside training
    'MUM(15Aug)' : ('Maharashtra', 'Mumbai', datetime.date(2020,8,15), None), 
    'MUM(1July)' : ('Maharashtra', 'Mumbai', datetime.date(2020,7,1), None), 
    # 'DEL(Latest)' : ('Delhi', None, None, None),
    'DEL(15Aug)' : ('Delhi', None, datetime.date(2020,8,15), None),
    'DEL(1Aug)' : ('Delhi', None, datetime.date(2020,8,1), None),
}

In [None]:

for tag, loc in location_tuples.items():
    predictions_dict[tag] = {}
    for j, config in enumerate(configs):
        predictions_dict[tag][model_names[j]] = {}
        config_params = copy.deepcopy(config['fitting'])
        config_params['data']['dataloading_params']['state'] = loc[0]
        config_params['data']['dataloading_params']['district'] = loc[1]
        config_params['split']['start_date'] = loc[2]
        config_params['split']['end_date'] = loc[3]
        if loc[1] != 'Mumbai':
            config_params['data']['smooth_jump'] = False
        for k in range(num_rep_trials):
            print (tag, model_names[j], k)
            predictions_dict[tag][model_names[j]][f'm{k}'] = single_fitting_cycle(**config_params) 

In [None]:
with open('../../misc/predictions/predictions_dict_time.pickle', 'wb') as handle:
    pkl.dump(predictions_dict, handle)

### Use the pickle file to read the predicitons_dict

In [None]:
with open('../../misc/predictions/predictions_dict.pickle', 'rb') as handle:
    predictions_dict = pkl.load(handle)

In [None]:
wandb.init(project="covid-modelling")
wandb.run.name = "degeneracy-exps-location"+wandb.run.name

In [None]:
plot_all_params(predictions_dict, model_params, method='ensemble_combined')

In [None]:
which_compartments = {model_names[i]: config['fitting']['loss']['loss_compartments'] for i, config in enumerate(configs)}
plot_all_losses(predictions_dict, which_losses=['train', 'val'], which_compartments=which_compartments)

In [None]:
plot_all_buckets(predictions_dict, which_buckets=['S', 'I', 'E', 'I_U'], compare='model', model_types=model_types)