In [1]:
%load_ext autoreload
%autoreload 2

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from hyperopt import hp, tpe, fmin, Trials
from tqdm import tqdm

import datetime
import copy
import json

import sys
sys.path.append('../../')

from data.dataloader import Covid19IndiaLoader
from data.processing.granular import get_data

from models.seir import SEIR_Testing, SEIRHD, SEIR_Movement, SEIR_Movement_Testing

from main.seir.fitting import single_fitting_cycle, get_variable_param_ranges
from main.seir.forecast import get_forecast, create_region_csv, create_all_csvs, write_csv
from utils.create_report import create_report
from utils.loss import Loss_Calculator
from viz import plot_forecast

In [170]:
filename = '../../data/data/mumbai/case_summary_expanded.csv'

In [197]:
get_data(filename)

Unnamed: 0,state,district,date,total,active,recovered,deceased,stable_asymptomatic,stable_symptomatic,critical,ccc2,dchc,dch,hq,total_beds,non_o2_beds,o2_beds,icu,ventilator
0,Maharashtra,Mumbai,2020-05-23,28634,20209,7476,949,13955,5491,763,3795,1086,5208,10120,10089,6876,2606,607,230
1,Maharashtra,Mumbai,2020-05-24,30359,21297,8074,988,13955,5491,763,3795,1086,5208,11208,10089,6837,2642,610,242
2,Maharashtra,Mumbai,2020-05-25,31789,22359,8404,1026,15809,5732,818,3855,1136,5647,11721,10638,7413,2606,619,237
3,Maharashtra,Mumbai,2020-05-26,32791,22912,8814,1065,16281,5776,855,3977,1240,5793,11902,11010,7637,2735,638,262
4,Maharashtra,Mumbai,2020-05-27,33835,23684,9054,1065,16976,5845,863,4132,1152,5825,12575,11109,7686,2779,644,267
5,Maharashtra,Mumbai,2020-05-28,35273,24321,9817,1135,17414,6060,847,4080,953,4997,14291,10030,6586,2799,645,287
6,Maharashtra,Mumbai,2020-05-29,36710,19529,16008,1173,12607,6070,852,4183,962,5004,9380,10149,6535,3010,604,299
7,Maharashtra,Mumbai,2020-05-30,38220,20629,16364,1227,13315,6309,1005,4222,991,5089,10327,10302,6538,3094,670,297
8,Maharashtra,Mumbai,2020-05-31,39464,21391,16794,1279,14098,6315,978,4289,980,5143,11079,10312,6347,3279,686,299
9,Maharashtra,Mumbai,2020-06-01,40877,22571,16987,1319,15045,6505,1021,4359,2981,5663,9568,13003,8555,3375,1073,362


In [195]:
df = pd.read_csv(filename)

In [172]:
df.columns = df.loc[0].apply(lambda x : x.lower().strip().replace(' ', '_'))

In [173]:
df.drop(np.arange(3), inplace=True)

In [174]:
df.dropna(axis=0, how='any', inplace=True)

In [175]:
df.replace(',', '', regex=True, inplace=True)

In [176]:
df.loc[:, 'total_cases':] = df.loc[:, 'total_cases':].apply(pd.to_numeric)

In [177]:
df['date'] = pd.to_datetime(df['date'])

In [178]:
df = df.infer_objects()

In [179]:
df = df[(df.select_dtypes(include='int64') > 0).sum(axis=1) == len(df.select_dtypes(include='int64').columns)]

In [180]:
df.reset_index(inplace=True, drop=True)

In [181]:
df = df.drop([x for x in df.columns if '_capacity' in x], axis=1)

In [182]:
df.columns = [x.replace('_occupied', '') for x in df.columns]

In [183]:
df = df.rename({'city':'district', 'total_cases':'total', 'active_cases':'active', 'icu_beds':'icu', 'ventilator_beds':'ventilator'}, axis='columns')

In [184]:
df['hq'] = df['active'] - df['total_beds']

In [185]:
df['non_o2_beds'] = df['total_beds'] - (df['o2_beds']+df['icu'])

In [186]:
col = df.pop('hq')
df.insert(int(np.where(df.columns == 'o2_beds')[0][0]), 'hq', col)

In [187]:
col = df.pop('total_beds')
df.insert(int(np.where(df.columns == 'o2_beds')[0][0]), 'total_beds', col)

In [188]:
col = df.pop('non_o2_beds')
df.insert(int(np.where(df.columns == 'o2_beds')[0][0]), 'non_o2_beds', col)

In [189]:
df.loc[:, ['hq', 'non_o2_beds', 'o2_beds', 'icu']].sum(axis=1) == df['active']

0     True
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
10    True
11    True
12    True
13    True
14    True
15    True
16    True
17    True
18    True
19    True
20    True
dtype: bool

In [190]:
df.loc[:, ['ccc2', 'dchc', 'dch']].sum(axis=1) == df['total_beds']

0      True
1      True
2      True
3      True
4      True
5      True
6      True
7      True
8     False
9      True
10     True
11     True
12     True
13     True
14     True
15     True
16     True
17     True
18     True
19     True
20     True
dtype: bool

In [191]:
df.loc[:, ['stable_asymptomatic', 'stable_symptomatic', 'critical']].sum(axis=1) == df['active']

0      True
1     False
2      True
3      True
4      True
5      True
6      True
7      True
8      True
9      True
10     True
11     True
12     True
13     True
14     True
15     True
16     True
17     True
18     True
19     True
20     True
dtype: bool

In [192]:
df

Unnamed: 0,state,district,date,total,active,recoveries,deaths,stable_asymptomatic,stable_symptomatic,critical,ccc2,dchc,dch,hq,total_beds,non_o2_beds,o2_beds,icu,ventilator
0,Maharashtra,Mumbai,2020-05-23,28634,20209,7476,949,13955,5491,763,3795,1086,5208,10120,10089,6876,2606,607,230
1,Maharashtra,Mumbai,2020-05-24,30359,21297,8074,988,13955,5491,763,3795,1086,5208,11208,10089,6837,2642,610,242
2,Maharashtra,Mumbai,2020-05-25,31789,22359,8404,1026,15809,5732,818,3855,1136,5647,11721,10638,7413,2606,619,237
3,Maharashtra,Mumbai,2020-05-26,32791,22912,8814,1065,16281,5776,855,3977,1240,5793,11902,11010,7637,2735,638,262
4,Maharashtra,Mumbai,2020-05-27,33835,23684,9054,1065,16976,5845,863,4132,1152,5825,12575,11109,7686,2779,644,267
5,Maharashtra,Mumbai,2020-05-28,35273,24321,9817,1135,17414,6060,847,4080,953,4997,14291,10030,6586,2799,645,287
6,Maharashtra,Mumbai,2020-05-29,36710,19529,16008,1173,12607,6070,852,4183,962,5004,9380,10149,6535,3010,604,299
7,Maharashtra,Mumbai,2020-05-30,38220,20629,16364,1227,13315,6309,1005,4222,991,5089,10327,10302,6538,3094,670,297
8,Maharashtra,Mumbai,2020-05-31,39464,21391,16794,1279,14098,6315,978,4289,980,5143,11079,10312,6347,3279,686,299
9,Maharashtra,Mumbai,2020-06-01,40877,22571,16987,1319,15045,6505,1021,4359,2981,5663,9568,13003,8555,3375,1073,362
