In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from hyperopt import hp, tpe, fmin, Trials
from tqdm import tqdm
# from tqdm.notebook import tqdm

from collections import OrderedDict
import itertools
from functools import partial
import datetime
from joblib import Parallel, delayed
import copy

from data.dataloader import get_jhu_data, get_covid19india_api_data
from data.processing import get_district_time_series

from models.seir.seir_testing import SEIR_Testing
from main.seir.optimiser import Optimiser
from utils.plotting import create_plots

## Load Covid19india Data

In [3]:
dataframes = get_covid19india_api_data()
df_district = get_district_time_series(dataframes, state='Karnataka', district='Bengaluru')

## Take Rolling Mean

In [4]:
df_true_fitting = copy.copy(df_district)
df_true_fitting['total_infected'] = df_true_fitting['total_infected'].rolling(5).mean()
df_true_fitting = df_true_fitting[np.logical_not(df_true_fitting['total_infected'].isna())]

## Create Train-Val Split

In [5]:
df_train = df_true_fitting.iloc[:-5, :]
df_val = df_true_fitting.iloc[-5:, :]

In [6]:
df_train

Unnamed: 0,date,total_infected
4,2020-03-13,3.8
5,2020-03-14,4.6
6,2020-03-15,4.8
7,2020-03-16,5.0
8,2020-03-17,5.6
9,2020-03-18,6.8
10,2020-03-19,8.0
11,2020-03-20,9.2
12,2020-03-21,11.2
13,2020-03-22,13.4


## Init Optimiser

In [7]:
optimiser = Optimiser()

In [8]:
default_params = optimiser.init_default_params(df_train)

## Run Optimisation

### Gridsearch

In [20]:
variable_param_ranges = {
    'R0' : np.linspace(1.8, 3, 13), 
    'P_severe' : np.linspace(0.3, 0.9, 25),
    'intervention_amount' : np.linspace(0.4, 1, 31)
}

loss_array, list_of_param_dicts = optimiser.gridsearch(df_train, default_params, variable_param_ranges, method='rmse', loss_indices=None)

HBox(children=(FloatProgress(value=0.0, max=10075.0), HTML(value='')))




### Bayes Opt

In [9]:
variable_param_ranges = {
    'R0' : hp.uniform('R0', 1.6, 3),
    'T_inc' : hp.uniform('T_inc', 4, 5),
    'T_inf' : hp.uniform('T_inf', 3, 4),
    'T_recov_severe' : hp.uniform('T_recov_severe', 9, 20),
    'P_severe' : hp.uniform('P_severe', 0.3, 0.99),
    'intervention_amount' : hp.uniform('intervention_amount', 0.3, 1)
}
best, trials = optimiser.bayes_opt(df_train, default_params, variable_param_ranges, method='rmse', num_evals=3500, loss_indices=None)

100%|██████████| 3500/3500 [03:47<00:00, 15.39trial/s, best loss: 1.8896000199232956]


In [10]:
optimiser.solve(best, default_params, df_train)

> /home/users/sansiddh/projects/covid/covid-modelling/main/seir/optimiser.py(32)solve()
-> total_days = (end_date - params_dict['starting_date']).days


(Pdb)  c


Unnamed: 0,date,S,E,I,D_E,D_I,R_mild,R_severe_home,R_severe_hosp,R_fatal,C,D,hospitalisations,recoveries,fatalities,infectious_unknown,total_infected
0,2020-03-13,9999996,0,3,0,0,0,0,0,0,0,0,0,0,0,3,0
1,2020-03-14,9999993,2,3,0,0,0,0,0,0,0,0,0,0,0,3,0
2,2020-03-15,9999991,3,3,0,0,0,0,1,0,0,0,1,0,0,3,1
3,2020-03-16,9999989,4,3,0,0,0,0,1,0,0,0,1,0,0,3,1
4,2020-03-17,9999987,6,3,0,0,0,0,2,0,0,0,2,0,0,3,2
5,2020-03-18,9999984,7,4,0,0,0,0,3,0,0,0,3,0,0,4,3
6,2020-03-19,9999981,8,4,0,0,0,0,3,0,0,0,3,0,0,4,3
7,2020-03-20,9999976,10,5,0,0,1,0,4,0,0,0,4,0,0,5,4
8,2020-03-21,9999970,13,7,0,0,1,0,5,0,1,0,5,1,0,7,6
9,2020-03-22,9999965,15,8,0,0,1,0,7,0,1,0,7,1,0,8,8
