In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from hyperopt import hp, tpe, fmin, Trials
from tqdm import tqdm
# from tqdm.notebook import tqdm

from collections import OrderedDict, defaultdict
import itertools
from functools import partial
import datetime
from joblib import Parallel, delayed
import copy
import json
import pymc3 as pm
from pymc3.ode import DifferentialEquation
from theano.ifelse import ifelse
from theano import tensor as T, function, printing
import theano
theano.config.compute_test_value='ignore'
theano.config.gcc.cxxflags = "-Wno-c++11-narrowing"


from data.dataloader import get_jhu_data, get_covid19india_api_data, get_rootnet_api_data
from data.processing import get_data, get_district_time_series

from models.seir.seir_testing import SEIR_Testing, SEIR_Test_pymc3
from main.seir.optimiser import Optimiser
from main.seir.losses import Loss_Calculator
from main.seir.fitting import single_fitting_cycle, train_val_split
from main.seir.forecast import create_region_csv, create_all_csvs, write_csv, plot_forecast

## Comparison of E/Hosp and I/Hosp ratios

In [None]:
for district in predictions_dict.keys():
    district_dict = predictions_dict[district]
    
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.plot(district_dict['m1']['df_prediction']['date'], district_dict['m1']['df_prediction']['E'] / district_dict['m1']['df_prediction']['hospitalised'],
            '-', color='C0', label='E / Hosp (M1)')
    ax.plot(district_dict['m1']['df_prediction']['date'], district_dict['m1']['df_prediction']['I'] / district_dict['m1']['df_prediction']['hospitalised'],
            '-.', color='C0', label='I / Hosp (M1)')
    ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
    ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.ylabel('No of People')
    plt.xlabel('Time')
    plt.legend()
    plt.title('I/Hosp and E/Hosp ratio for {}, {}'.format(district[0], district[1]))
    plt.grid()
    plt.show()

## Load Covid19india Data

In [None]:
dataframes = get_covid19india_api_data()

In [None]:
predictions_dict = {}

## Select Districts to fit on

In [None]:
districts_to_show = [('Maharashtra', 'Pune'), 
                     ('Maharashtra', 'Mumbai'), 
                     ('Rajasthan', 'Jaipur'), 
                     ('Gujarat', 'Ahmedabad'), 
                     ('Karnataka', 'Bengaluru Urban'),
                     ('Delhi', None)]

## Perform M1 and M2 fits

In [None]:
for state, district in districts_to_show:
    predictions_dict[(state, district)] = {}
    predictions_dict[(state, district)]['m1'] = single_fitting_cycle(dataframes, state, district, train_period=7, val_period=7, 
                                                                     data_from_tracker=True, initialisation='intermediate',
                                                                     which_compartments=['hospitalised', 'total_infected'])
    predictions_dict[(state, district)]['m2'] = single_fitting_cycle(dataframes, state, district, train_period=7, val_period=0, 
                                                                     train_on_val=True, data_from_tracker=True, initialisation='intermediate',
                                                                     which_compartments=['hospitalised', 'total_infected'])

In [None]:
class Optimiser_pymc3(Optimiser):
    def __init__(self):
        super().__init__()
    def solve(self, variable_params, default_params, df_true, start_date=None, end_date=None, 
              state_init_values=None, initialisation='starting', loss_indices=[-20, -10]):
        params_dict = {**variable_params, **default_params}
        if initialisation == 'intermediate':
            row = df_true.iloc[loss_indices[0], :]
            
            state_init_values = OrderedDict()
            key_order = ['S', 'E', 'I', 'D_E', 'D_I', 
                'R_mild', 'R_severe_home', 'R_severe_hosp', 'R_fatal', 'C', 'D']
            for key in key_order:
                state_init_values[key] = 0

            state_init_values['R_severe_hosp'] = params_dict['P_severe'] / (params_dict['P_severe'] + params_dict['P_fatal']) * row['hospitalised']
            state_init_values['R_fatal'] = params_dict['P_fatal'] / (params_dict['P_severe'] + params_dict['P_fatal']) * row['hospitalised']
            state_init_values['C'] = row['recovered']
            state_init_values['D'] = row['deceased']

            state_init_values['E'] = params_dict['E_hosp_ratio'] * row['hospitalised']
            state_init_values['I'] = params_dict['I_hosp_ratio'] * row['hospitalised']
            
            nonSsum = sum(state_init_values.values())
            state_init_values['S'] = (params_dict['N'] - nonSsum)
            for key in state_init_values.keys():
                state_init_values[key] = state_init_values[key]/params_dict['N']
            params_dict['state_init_values'] = state_init_values
        
        if end_date == None:
            end_date = df_true.iloc[-1, :]['date']
        else:
            if type(end_date) is str:
                end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d')
        
        if start_date != None:
            if type(start_date) is str:
                start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
            params_dict['starting_date'] = start_date


        solver = SEIR_Testing_pymc3(**params_dict)
        total_days = (end_date - params_dict['starting_date']).days
        sol = solver.solve_ode(total_no_of_days=total_days, time_step=1)
        return sol
        #df_prediction = solver.return_predictions(sol)
        #return df_prediction

In [None]:
def get_predictions_mcmc(params, default_params, df_train, initialisation, train_period):
    if train_on_val:
        df_prediction = optimiser.solve(params, default_params, df_train, end_date=df_train.iloc[-1, :]['date'], 
                                        initialisation=initialisation, loss_indices=[-train_period, None])
    else:
        df_prediction = optimiser.solve(params, default_params, df_train, end_date=df_val.iloc[-1, :]['date'],
                                        initialisation=initialisation, loss_indices=[-train_period, None])
    return df_prediction

In [None]:
train_period=7
val_period=0
train_on_val=True
data_from_tracker=True
initialisation='intermediate'
which_compartments=['hospitalised', 'total_infected']
use_mcmc = True
state = "Maharashtra"
district= "Mumbai"
filename=None
pre_lockdown=False
N=1e7

In [None]:
print('fitting to data with "train_on_val" set to {} ..'.format(train_on_val))

if data_from_tracker:
    df_district = get_data(dataframes, state=state, district=district, use_dataframe='districts_daily')
else:
    df_district = get_data(dataframes, state, district, disable_tracker=True, filename=filename)

df_district_raw_data = get_data(dataframes, state=state, district=district, use_dataframe='raw_data')
df_district_raw_data = df_district_raw_data[df_district_raw_data['date'] <= '2020-03-25']

if district is None:
    district = ''

# Get train val split
if pre_lockdown:
    df_train, df_val, df_true_fitting = train_val_split(
        df_district_raw_data, train_rollingmean=False, val_rollingmean=False, val_size=0)
else:
    if train_on_val:
        df_train, df_val, df_true_fitting = train_val_split(
            df_district, train_rollingmean=True, val_rollingmean=True, val_size=0)
        df_train_nora, df_val_nora, df_true_fitting = train_val_split(
            df_district, train_rollingmean=False, val_rollingmean=False, val_size=val_period)
    else:
        df_train, df_val, df_true_fitting = train_val_split(
            df_district, train_rollingmean=True, val_rollingmean=True, val_size=val_period)
        df_train_nora, df_val_nora, df_true_fitting = train_val_split(
            df_district, train_rollingmean=False, val_rollingmean=False, val_size=val_period)

print('train\n', df_train.tail())
print('val\n', df_val)

In [None]:
# Initialise Optimiser
optimiser = Optimiser_pymc3()
# Get the fixed params
if initialisation == 'starting':
    init_infected = max(df_district_raw_data.iloc[0, :]['total_infected'], 1)
    start_date = df_district_raw_data.iloc[0, :]['date']
    default_params = optimiser.init_default_params(df_train, N=N, init_infected=init_infected, 
                                                   start_date=start_date)
if initialisation == 'intermediate':
    start_date = df_train.iloc[-train_period, :]['date']
    default_params = optimiser.init_default_params(df_train, N=N, init_infected=0, 
                                                   start_date=start_date)

In [None]:
#TODO implement which compartments
SEIR_Test_obj = SEIR_Test_pymc3()
num_patients = SEIR_Test_obj.__dict__['vanilla_params']['N']
init_vals = list(SEIR_Test_obj.__dict__['state_init_values'].values())
num_states = 11
num_params = 7
num_steps = 40
num_train_steps = 7

burn_in = 10
mcmc_steps = 20

observed = df_train['total_infected'][-num_train_steps:]
num_train = len(df_train)

In [None]:
sir_model = DifferentialEquation(
    func=SEIR_Test_obj.get_derivative,
    times=np.arange(0, num_steps, 1),
    n_states= num_states,
    n_theta= num_params,
    t0 = 0
)

In [None]:
with pm.Model() as model:
    R0 = pm.Uniform("R0", lower = 1, upper = 3)#(1.6, 3)
    T_inc = pm.Uniform("T_inc", lower = 1, upper = 5)#(3, 4)
    T_inf = pm.Uniform("T_inf", lower = 1, upper = 4)#(3, 4)
    T_recov_severe = pm.Uniform("T_recov_severe ", lower = 9, upper = 20)
    P_severe = pm.Uniform("P_severe", lower = 0.3, upper = 0.99)
    P_fatal = pm.Uniform("P_fatal", lower = 1e-6, upper = 0.3)
    intervention_amount = pm.Uniform("intervention_amount", lower = 0.3, upper = 1)
    E_hosp_ratio = pm.Uniform("E_hosp_ratio", lower = 1e-6, upper = 2)
    I_hosp_ratio = pm.Uniform("I_hosp_ratio", lower = 1e-6, upper = 1)
    
    ode_solution = sir_model(y0=init_vals , theta=[R0, T_inc, T_inf, T_recov_severe, P_severe,
                                                   P_fatal, intervention_amount])
    # The ode_solution has a shape of (n_times, n_states)

    predictions = ode_solution[num_train-num_train_steps-1:num_train-1]
    hospitalised = predictions[:,6] + predictions[:,7] + predictions[:,8]
    recovered = predictions[:,9]
    deceased = predictions[:,10]
    total_infected = hospitalised + recovered + deceased
    total_infected = total_infected * num_patients 
    #sigma = pm.HalfNormal('sigma',
    #                      sigma=observed.std(),
    #                      shape=num_params)
    Y = pm.Normal('Y', mu = total_infected, observed=observed)

    prior = pm.sample_prior_predictive()
    trace = pm.sample(mcmc_steps, tune=burn_in , target_accept=0.9, cores=4)
    posterior_predictive = pm.sample_posterior_predictive(trace)

In [None]:
trace[0]

In [None]:
total_df_predictions = pd.DataFrame() 
for params in trace:
    df_prediction = get_predictions_mcmc(params, default_params, df_train, initialisation, train_period)
    total_df_predictions = pd.concat([total_df_prediction, prediction], axis = 1)

In [None]:
df_prediction = total_df_predictions.mean(axis = 1)
df_upper =  df_prediction  + 1.96*total_df_predictions.std(axis = 1)
df_lower =  df_prediction  - 1.96*total_df_predictions.std(axis = 1)
df_loss = calculate_loss(df_train_nora, df_val_nora, df_prediction, train_period,
                         train_on_val, which_compartments=which_compartments)


ax = create_plots(df_prediction, df_train, df_val, df_train_nora, df_val_nora, train_period, state, district,
                  which_compartments=which_compartments)

results_dict = {}
for name in ['best_params', 'default_params', 'optimiser', 'df_prediction', 'df_district', 'df_train', \
    'df_val', 'df_loss', 'ax']:
    results_dict[name] = eval(name)


## Create Master Loss Dataframe

In [None]:
loss_columns = pd.MultiIndex.from_product([predictions_dict[('Maharashtra', 'Pune')]['m1']['df_loss'].columns, predictions_dict[('Maharashtra', 'Pune')]['m1']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m1']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

In [None]:
predictions_dict[('Maharashtra','Mumbai')]

## Plot Forecasts

In [None]:
for region in predictions_dict.keys():
    plot_forecast(predictions_dict[region], region, both_forecasts=False)

## Create and Save Output CSV

In [None]:
df_output = create_all_csvs(predictions_dict, initialisation='intermediate', train_period=7, icu_fraction=0.02)

In [None]:
write_csv(df_output, '../../output-{}.csv'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

## Custom Pune Plotting

In [None]:
plot_forecast(predictions_dict[('Maharashtra', 'Pune')], ('Maharashtra', 'Pune'), both_forecasts=False, filename='../../plots/m2-only.eps')

In [None]:
plot_forecast(predictions_dict[('Maharashtra', 'Mumbai')])