In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import datetime
import copy
import time
import pandas as pd
import sys
import os
sys.path.append('../../')

import models

from main.seir.fitting import single_fitting_cycle
from main.seir.forecast import get_forecast, forecast_all_trials, create_all_trials_csv, create_decile_csv_new
from utils.generic.create_report import save_dict_and_create_report
from utils.generic.config import read_config, make_date_key_str
from utils.generic.enums import Columns
from utils.fitting.loss import Loss_Calculator
from utils.generic.logging import log_wandb, log_mlflow
from viz import plot_forecast, plot_top_k_trials, plot_ptiles

import yaml
import wandb

In [None]:
predictions_dict = {}

In [None]:
config_filename = 'exp_simulate_2.yaml'
config = read_config(config_filename)

wandb_config = read_config(config_filename, preprocess=False)
wandb_config = make_date_key_str(wandb_config)

In [None]:
timestamp = datetime.datetime.now()
output_folder = '../../misc/reports/{}'.format(timestamp.strftime("%Y_%m%d_%H%M%S"))

## Perform M1 and M2 fits

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({
    'text.usetex': False,
    'font.size': 12,
    'font.family': 'Palatino',
})

In [None]:

# scenario_dict ={}   
# for i in range(7):
#     file_name = '../../misc/predictions/exp_{}.pickle'.format(i)
#     with open(file_name, 'rb') as handle:
#         PD = pkl.load(handle)
#     scenario_dict['exp'+str(i)] = PD['m1']
# with open('../../misc/predictions/scenario_dict.pickle', 'wb') as handle:
#     pkl.dump(scenario_dict, handle)

In [None]:
with open('../../misc/predictions/exp_simulate_2.pickle', 'rb') as handle:
        PD = pkl.load(handle)

In [None]:
scenario_dict['exp4'] = PD['m1']

In [None]:
figs, axs = plt.subplots(2,4,figsize = [18,10])
arr = ['beta', 'T_inc', 'T_inf', 'T_recov', 'T_recov_fatal', 'P_fatal', 'E_hosp_ratio', 'I_hosp_ratio']
plot_2_histogram(scenario_dict,'exp0','exp4',arr,true_val,figs,axs)


In [None]:
import pickle as pkl
with open('../../misc/predictions/exp_simulate_2.pickle', 'rb') as handle:
        PD = pkl.load(handle)

In [None]:
import pickle as pkl
with open('../../misc/predictions/exp_simulate_1.pickle', 'rb') as handle:
        PD1 = pkl.load(handle)

In [None]:
def update_hosp_ratios(param_set, data_config, model_config):
    df = pd.read_csv(os.path.join('../../data/data/simulated_data/', data_config['output_file_name']), index_col=0)
    if model_config['end_date']:
        # print(model_config['end_date'])
        if isinstance(model_config['end_date'], int):
            if end_date > 0:
                raise ValueError('Please enter a negative value for end_date if entering an integer')
        if isinstance(model_config['end_date'], datetime.date):
            df['date'] = pd.to_datetime(df['date'])
            end_date = df.loc[df['date'].dt.date == model_config['end_date']].index[0] - len(df) + 1
    else:
        end_date = 0
    train_start_row = df.iloc[len(df) - (model_config['train_period'] + model_config['val_period'] + model_config['test_period']) + end_date]
    if data_config['model'] == 'SEIRHD':
        param_set['E_hosp_ratio'] = train_start_row['E'] / train_start_row['active']
        param_set['I_hosp_ratio'] = train_start_row['I'] / train_start_row['active']
    elif data_config['model'] == 'SEIRHD_Beta':
        param_set['E_hosp_ratio'] = train_start_row['E'] / train_start_row['active']
        param_set['I_hosp_ratio'] = train_start_row['I'] / train_start_row['active']
    elif data_config['model'] == 'SEIR_PU':
        param_set['E_hosp_ratio'] = train_start_row['E'] / train_start_row['active']
        param_set['I_hosp_ratio'] = train_start_row['I'] / train_start_row['active']
        param_set['Pu_pop_ratio'] = train_start_row['Pu'] / train_start_row['']
    return param_set

In [None]:
filename = 'seirhd_beta.yaml'
with open(f'../../configs/simulated_data/{filename}') as configfile:
        data_config = yaml.load(configfile, Loader=yaml.SafeLoader)
true_val = data_config['params']
true_val = update_hosp_ratios(true_val,data_config,config['fitting']['split'])
print(true_val)
true_val['P_fatal'] = 0.08

In [None]:
figs, axs = plt.subplots(2,4,figsize = [18,9])
from viz.fit import plot_histogram,plot_all_histogram,plot_2_histogram,plot_log_density
arr = ['beta', 'T_inc', 'T_inf', 'T_recov', 'T_recov_fatal', 'P_fatal', 'E_hosp_ratio', 'I_hosp_ratio']
plot_log_density(PD['m1'],arr,true_val,figs,axs)

In [None]:
figs, axs = plt.subplots(2,4,figsize = [20,10])
plot_histogram(PD1['m1'],arr,true_val,figs,axs)
plot_histogram(PD['m1'],arr,true_val,figs,axs)



In [None]:
from unidip import UniDip
import pandas as pd
arr = list(true_val.keys())

In [None]:
import pickle as pkl
with open('../../misc/predictions/exp_simulate_1.pickle', 'rb') as handle:
        PD = pkl.load(handle)
from main.seir.forecast import _order_trials_by_loss
params_array, losses_array = _order_trials_by_loss(PD['m1'])
params_dict = {param: [param_dict[param] for param_dict in params_array]
                for param in arr}
df = pd.DataFrame.from_dict(params_dict)


In [None]:
plt.rcParams.update({
    'text.usetex': False,
    'font.size': 18,
    'font.family': 'Palatino',
})

In [None]:
import seaborn as sns
CM = df.corr()
fig = plt.figure(figsize = (16,14))
heatmap = sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=True,cmap = 'coolwarm')

In [None]:
sns.pairplot(df)

In [None]:
((CM.abs().mean()*8 - 1)/7).sort_values(ascending = False)

In [None]:
# for i in arr:
#     R_0 = df[i].to_numpy()
#     R_0 = np.msort(R_0)
#     interavals = UniDip(R_0,alpha=0.0000001, ntrials=1000).run()
#     print(i,interavals)

In [None]:
%%time
predictions_dict['m1'] = single_fitting_cycle(**copy.deepcopy(config['fitting']))

In [None]:
m2_params = copy.deepcopy(config['fitting'])
m2_params['split']['val_period'] = 0
predictions_dict['m2'] = single_fitting_cycle(**m2_params)

predictions_dict['fitting_date'] = timestamp.strftime("%Y-%m-%d")

In [None]:
predictions_dict['m1']['best_params']

In [None]:
predictions_dict['m2']['best_params']

## Loss Dataframes

### M1 Loss DataFrame

In [None]:
predictions_dict['m1']['df_loss']

### M2 Loss DataFrame

In [None]:
predictions_dict['m2']['df_loss']

## Sensitivity Plot

In [None]:
predictions_dict['m1']['plots']['sensitivity'], _, _ = calculate_sensitivity_and_plot(predictions_dict, config, which_fit='m1')
predictions_dict['m2']['plots']['sensitivity'], _, _ = calculate_sensitivity_and_plot(predictions_dict, config, which_fit='m2')

## Plot Forecasts

In [None]:
predictions_dict['m2']['forecasts'] = {}
predictions_dict['m2']['forecasts']['best'] = get_forecast(predictions_dict, train_fit='m2', 
                                                           model=config['fitting']['model'], 
                                                           forecast_days=config['forecast']['forecast_days'])

predictions_dict['m2']['plots']['forecast_best'] = plot_forecast(predictions_dict, 
                                                                 'test', 
                                                                 error_bars=False)

predictions_dict['m1']['trials_processed'] = forecast_all_trials(predictions_dict, train_fit='m1', 
                                                                 model=config['fitting']['model'], 
                                                                 forecast_days=config['forecast']['forecast_days'])

predictions_dict['m2']['trials_processed'] = forecast_all_trials(predictions_dict, train_fit='m2', 
                                                                 model=config['fitting']['model'], 
                                                                 forecast_days=config['forecast']['forecast_days'])

kforecasts = plot_top_k_trials(predictions_dict, train_fit='m2',
                               k=config['forecast']['num_trials_to_plot'],
                               which_compartments=config['forecast']['plot_topk_trials_for_columns'])
                               
predictions_dict['m2']['plots']['forecasts_topk'] = {}
for column in config['forecast']['plot_topk_trials_for_columns']:
    predictions_dict['m2']['plots']['forecasts_topk'][column.name] = kforecasts[column]

## Uncertainty + Uncertainty Forecasts

In [None]:
config = read_config(config_filename)

In [None]:
uncertainty_args = {'predictions_dict': predictions_dict, 'fitting_config': config['fitting'],
                    'forecast_config': config['forecast'], **config['uncertainty']['uncertainty_params']}
                    
uncertainty = config['uncertainty']['method'](**uncertainty_args)

In [None]:
uncertainty.ensemble_mean_forecast['df_loss']

In [None]:
uncertainty_forecasts = uncertainty.get_forecasts()
for key in uncertainty_forecasts.keys():
    predictions_dict['m2']['forecasts'][key] = uncertainty_forecasts[key]['df_prediction']
    
predictions_dict['m2']['forecasts']['ensemble_mean'] = uncertainty.ensemble_mean_forecast

In [None]:
predictions_dict['m2']['beta'] = uncertainty.beta
predictions_dict['m2']['beta_loss'] = uncertainty.beta_loss
predictions_dict['m2']['deciles'] = uncertainty_forecasts

In [None]:
predictions_dict['m2']['plots']['forecast_best_50'] = plot_forecast(predictions_dict, 
                                                                    (config['fitting']['data']['dataloading_params']['state'], 
                                                                     config['fitting']['data']['dataloading_params']['district']),
                                                                    fits_to_plot=['best', 48.7804878], error_bars=False)
predictions_dict['m2']['plots']['forecast_best_80'] = plot_forecast(predictions_dict, 
                                                                    (config['fitting']['data']['dataloading_params']['state'], 
                                                                     config['fitting']['data']['dataloading_params']['district']),
                                                                    fits_to_plot=['best', 80.48780488], error_bars=False)
predictions_dict['m2']['plots']['forecast_ensemble_mean_50'] = plot_forecast(predictions_dict, 
                                                                             (config['fitting']['data']['dataloading_params']['state'], 
                                                                              config['fitting']['data']['dataloading_params']['district']),
                                                                             fits_to_plot=['ensemble_mean', 48.7804878], error_bars=False)

In [None]:
config['forecast']['plot_ptiles_for_columns']

In [None]:
ptiles_plots = plot_ptiles(predictions_dict, which_compartments=config['forecast']['plot_ptiles_for_columns'])
predictions_dict['m2']['plots']['forecasts_ptiles'] = {}
for column in config['forecast']['plot_ptiles_for_columns']:
    predictions_dict['m2']['plots']['forecasts_ptiles'][column.name] = ptiles_plots[column]

## Create Report

In [None]:
save_dict_and_create_report(predictions_dict, config, ROOT_DIR=output_folder, config_filename=config_filename)

## Create Output CSV

In [None]:
df_output = create_decile_csv_new(predictions_dict)
df_output.to_csv(f'{output_folder}/deciles.csv')

## Log on W&B

In [None]:
wandb.init(project="covid-modelling", config=wandb_config)

In [None]:
log_wandb(predictions_dict)

## Create All Trials Output

In [None]:
df_all = create_all_trials_csv(predictions_dict)
df_all.to_csv(f'{output_folder}/all_trials.csv')

## Log on MLFlow

In [None]:
a = {1:[2,3],2:[3,4],3:[4,5]}
b = {1:[4,5],2:[5,6],3:[7,8]}

In [None]:
a.update(b)

In [None]:
a


In [None]:
log_mlflow(config['logging']['experiment_name'], run_name=config['logging']['run_name'], artifact_dir=output_folder)