In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
import datetime

import copy
import json

import sys
sys.path.append('../../')

from data.dataloader import get_covid19india_api_data
from models.ihme.dataloader import get_dataframes_cached
from data.processing import get_data, get_concat_data

from main.seir.losses import Loss_Calculator
from main.seir.fitting import get_variable_param_ranges, data_setup, run_cycle, get_regional_data
from main.seir.forecast import get_forecast, create_region_csv, create_all_csvs, write_csv, plot_forecast
from utils.create_report import create_report
from recovs import smooth_using_active

## Load Covid19india Data

In [4]:
dataframes = get_dataframes_cached()

data/dataframes_ts_02062020.pkl


In [5]:
predictions_dict = {}

## Select Districts to fit on

In [6]:
# districts_to_show = [('Maharashtra', 'Pune'), 
#                      ('Maharashtra', 'Mumbai'), 
#                      ('Rajasthan', 'Jaipur'), 
#                      ('Gujarat', 'Ahmedabad'), 
#                      ('Karnataka', 'Bengaluru Urban'),
#                      ('Delhi', None)]

districts_to_show = [('Maharashtra', 'Mumbai')]

In [7]:
# df_district, df_district_raw_data = get_regional_data(
#         dataframes, state='Maharashtra', district='Mumbai', 
#         data_from_tracker=True, data_format=None, filename=None)
df_district = get_concat_data(dataframes, state='Maharashtra', district='Mumbai', concat=True)

60 deaths and 1148 recoveries in Maharashtra with unknown district


In [8]:
df_district[-10:]

Unnamed: 0,date,total_infected,hospitalised,deceased,recovered,district,state
73,2020-05-23,28817,21772,949,6096,Mumbai,Maharashtra
74,2020-05-24,30542,22471,988,7083,Mumbai,Maharashtra
75,2020-05-25,31972,23863,1026,7083,Mumbai,Maharashtra
76,2020-05-26,32974,23896,1065,8013,Mumbai,Maharashtra
77,2020-05-27,34018,24908,1097,8013,Mumbai,Maharashtra
78,2020-05-28,35485,25700,1135,8650,Mumbai,Maharashtra
79,2020-05-29,35485,25700,1135,8650,Mumbai,Maharashtra
80,2020-05-30,38442,20851,1227,16364,Mumbai,Maharashtra
81,2020-05-31,38442,20851,1227,16364,Mumbai,Maharashtra
82,2020-06-01,41099,22795,1319,16985,Mumbai,Maharashtra


In [25]:
# df_district[44:][-10:]
new_df_district = smooth_using_active(df_district, last_n_days=15)
new_df_district['new_ti'] = new_df_district['n_recovered'] + new_df_district['n_hospitalised'] + new_df_district['n_deceased'] 
new_df_district['orig_ti'] = new_df_district['recovered'] + new_df_district['hospitalised'] + new_df_district['deceased']
new_df_district['new_diff'] = new_df_district['total_infected'] - new_df_district['new_ti'] 
new_df_district['orig_diff'] = new_df_district['total_infected'] - new_df_district['orig_ti']
new_df_district['deceased_diff'] = new_df_district['deceased'] - new_df_district['n_deceased']
print (new_df_district[['date', 'total_infected', 'new_ti', 'new_diff', 'deceased_diff']][-25:])
# # new_df_district[-10:]
# # print (new_df_district['n_recovered'] - new_df_district['recovered'])
# # print (new_df_district['n_hospitalised'] - new_df_district['hospitalised'])
# new_df_district['recovered'] = new_df_district['n_recovered']
# new_df_district['hospitalised'] = new_df_district['n_hospitalised']
# del new_df_district['n_recovered']
# del new_df_district['n_hospitalised']
# new_df_district

date  total_infected  new_ti  new_diff  deceased_diff
58 2020-05-08           12142   12142         0              0
59 2020-05-09           12864   12864         0              0
60 2020-05-10           13739   13739         0              0
61 2020-05-11           14521   14521         0              0
62 2020-05-12           14947   14947         0              0
63 2020-05-13           15747   15747         0              0
64 2020-05-14           16738   16738         0              0
65 2020-05-15           17671   17671         0              0
66 2020-05-16           18555   18915      -360            -11
67 2020-05-17           20150   20634      -484            -15
68 2020-05-18           21335   21819      -484            -15
69 2020-05-19           21335   22137      -802            -25
70 2020-05-20           24118   25134     -1016            -31
71 2020-05-21           25500   26706     -1206            -37
72 2020-05-22           27251   28666     -1415            -44
7

## Perform M1 and M2 fits

In [54]:
smoothfunc = smooth_using_active
ndays=7


In [55]:
for state, district in districts_to_show:
    predictions_dict[(state, district)] = {}
    predictions_dict[(state, district)]['m1'] = rsingle_fitting_cycle(smoothfunc,
        dataframes, state, district, train_period=7, val_period=7,
        n_days_back_smooth=ndays,
        data_from_tracker=True, initialisation='intermediate', num_evals=700,
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    predictions_dict[(state, district)]['m2'] = rsingle_fitting_cycle(smoothfunc,
        dataframes, state, district, train_period=7, val_period=0, num_evals=700,
        n_days_back_smooth=ndays,
        train_on_val=True, data_from_tracker=True, initialisation='intermediate',
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    
    predictions_dict[(state, district)]['state'] = state
    predictions_dict[(state, district)]['dist'] = district
    predictions_dict[(state, district)]['fitting_date'] = datetime.datetime.now().strftime("%Y-%m-%d")
    predictions_dict[(state, district)]['datasource'] = 'covid19api' if predictions_dict[(state, district)]['m1']['data_from_tracker'] else 'municipality'
    predictions_dict[(state, district)]['variable_param_ranges'] = predictions_dict[(state, district)]['m1']['variable_param_ranges']
    predictions_dict[(state, district)]['data_last_date'] = predictions_dict[(state, district)]['m2']['data_last_date']

NameError: name 'rsingle_fitting_cycle' is not defined

## Create Master Loss Dataframe

### M1 Loss

In [56]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m1']['df_loss'].columns, predictions_dict[starting_key]['m1']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m1']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

KeyError: 'm1'

### M2 Loss

In [57]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m2']['df_loss'].columns, predictions_dict[starting_key]['m2']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m2']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

KeyError: 'm2'

In [58]:
predictions_dict[('Maharashtra', 'Mumbai')]['m1']['ax']

KeyError: 'm1'

## Plot Forecasts

In [59]:
for region in predictions_dict.keys():
    predictions_dict[region]['forecast'] = plot_forecast(predictions_dict[region], region, both_forecasts=False, error_bars=True)

getting forecasts ..


KeyError: 'm2'

In [60]:
for region in predictions_dict.keys():
    create_report(predictions_dict[region])

KeyError: 'fitting_date'

## Create and Save Output CSV

In [61]:
df_output = create_all_csvs(predictions_dict, icu_fraction=0.02)

compiling csv data ..
getting forecasts ..


KeyError: 'm2'

In [62]:
write_csv(df_output, '../../output-{}.csv'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

NameError: name 'df_output' is not defined