In [101]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [102]:
import numpy as np
import pandas as pd
import datetime

import copy
import json

import sys
sys.path.append('../../')

from data.dataloader import get_covid19india_api_data
from models.ihme.dataloader import get_dataframes_cached
from data.processing import get_data, get_concat_data

from main.seir.losses import Loss_Calculator
from main.seir.fitting import get_variable_param_ranges, data_setup, run_cycle, get_regional_data
from main.seir.forecast import get_forecast, create_region_csv, create_all_csvs, write_csv, plot_forecast
from utils.create_report import create_report
from recovs import smooth_using_total

In [103]:
def rsingle_fitting_cycle(smoothingfunc, dataframes, state, 
    district, train_period=7, val_period=7, train_on_val=False, 
    num_evals=1500, data_from_tracker=True, filename=None, data_format='new', pre_lockdown=False, N=1e7, which_compartments=['hospitalised', 'total_infected'], initialisation='starting', n_days_back_smooth=60):
    print('fitting to data with "train_on_val" set to {} ..'.format(train_on_val))

    # Get date
    _, df_district_raw_data = get_regional_data(dataframes, state, district, 
        data_from_tracker, data_format, filename)

    df_district = get_concat_data(dataframes, state=state, district=district, concat=True)

    new_df_district = smoothingfunc(df_district, last_n_days=n_days_back_smooth)

    new_df_district['recovered'] = new_df_district['n_recovered']
    new_df_district['hospitalised'] = new_df_district['n_hospitalised']
    del new_df_district['n_recovered']
    del new_df_district['n_hospitalised']
    df_district = new_df_district
    
    observed_dataframes = data_setup(
        df_district, df_district_raw_data, val_period
    )

    print('train\n', observed_dataframes['df_train'].tail())
    print('val\n', observed_dataframes['df_val'])
    
    return run_cycle(
        state, district, observed_dataframes, data_from_tracker=data_from_tracker,
        train_period=train_period, num_evals=num_evals, N=N, 
        which_compartments=which_compartments, initialisation=initialisation
    )

## Load Covid19india Data

In [104]:
dataframes = get_dataframes_cached()

../../cache/dataframes_ts_03062020.pkl


In [105]:
predictions_dict = {}

## Select Districts to fit on

In [106]:
# districts_to_show = [('Maharashtra', 'Pune'), 
#                      ('Maharashtra', 'Mumbai'), 
#                      ('Rajasthan', 'Jaipur'), 
#                      ('Gujarat', 'Ahmedabad'), 
#                      ('Karnataka', 'Bengaluru Urban'),
#                      ('Delhi', None)]

districts_to_show = [('Maharashtra', 'Mumbai')]

In [107]:
# df_district, df_district_raw_data = get_regional_data(
#         dataframes, state='Maharashtra', district='Mumbai', 
#         data_from_tracker=True, data_format=None, filename=None)
df_district = get_concat_data(dataframes, state='Maharashtra', district='Mumbai', concat=True)

60 deaths and 1148 recoveries in Maharashtra with unknown district


In [108]:
df_district[-10:]

Unnamed: 0,date,total_infected,hospitalised,deceased,recovered,district,state
74,2020-05-24,30542,22471,988,7083,Mumbai,Maharashtra
75,2020-05-25,31972,23863,1026,7083,Mumbai,Maharashtra
76,2020-05-26,32974,23896,1065,8013,Mumbai,Maharashtra
77,2020-05-27,34018,24908,1097,8013,Mumbai,Maharashtra
78,2020-05-28,35485,25700,1135,8650,Mumbai,Maharashtra
79,2020-05-29,35485,25700,1135,8650,Mumbai,Maharashtra
80,2020-05-30,38442,20851,1227,16364,Mumbai,Maharashtra
81,2020-05-31,38442,20851,1227,16364,Mumbai,Maharashtra
82,2020-06-01,41099,22795,1319,16985,Mumbai,Maharashtra
83,2020-06-02,42216,23635,1368,17213,Mumbai,Maharashtra


In [109]:
# df_district[44:][-10:]
new_df_district = smooth_using_total(df_district, last_n_days=15, cols=['total_infected', 'hospitalised', 'recovered'])
compare_cols = ['total_infected','hospitalised','deceased','recovered']
compare = (df_district[compare_cols][-20:], new_df_district[compare_cols][-20:])
# compare = (new_df_district[compare_cols][-20:])
compare = (df_district[compare_cols] - new_df_district[compare_cols])[-20:]
print (compare)

total_infected 2957
hospitalised -4849
recovered 7714
    total_infected  hospitalised  deceased  recovered
64             0.0           0.0       0.0        0.0
65             0.0           0.0       0.0        0.0
66          -351.0         593.0       0.0     -944.0
67          -470.0         795.0       0.0    -1265.0
68          -470.0         795.0       0.0    -1265.0
69          -779.0        1318.0       0.0    -2097.0
70          -987.0        1669.0       0.0    -2656.0
71         -1170.0        1980.0       0.0    -3150.0
72         -1372.0        2322.0       0.0    -3694.0
73         -1567.0        2652.0       0.0    -4219.0
74         -1803.0        3052.0       0.0    -4855.0
75         -2015.0        3409.0       0.0    -5424.0
76         -2130.0        3604.0       0.0    -5734.0
77         -2346.0        3970.0       0.0    -6316.0
78         -2614.0        4423.0       0.0    -7037.0
79         -2865.0        4849.0       0.0    -7714.0
80             0.0          

In [110]:
ti_check = copy.copy(df_district['total_infected'])
ti_check2 = copy.copy(new_df_district['total_infected'])
for col in ['hospitalised', 'recovered', 'deceased']:
    ti_check -= new_df_district[col]
    ti_check2 -= new_df_district[col]
print(pd.concat([ti_check[-25:], ti_check2[-25:]], axis=1))

total_infected  total_infected
59             0.0               0
60             0.0               0
61             0.0               0
62             0.0               0
63             0.0               0
64             0.0               0
65             0.0               0
66          -351.0               0
67          -470.0               0
68          -470.0               0
69          -779.0               0
70          -987.0               0
71         -1170.0               0
72         -1372.0               0
73         -1567.0               0
74         -1803.0               0
75         -2015.0               0
76         -2130.0               0
77         -2346.0               0
78         -2614.0               0
79         -2865.0               0
80             0.0               0
81             0.0               0
82             0.0               0
83             0.0               0


## Perform M1 and M2 fits

In [111]:
smoothfunc = smooth_using_active
ndays=7


In [112]:
for state, district in districts_to_show:
    predictions_dict[(state, district)] = {}
    predictions_dict[(state, district)]['m1'] = rsingle_fitting_cycle(smoothfunc,
        dataframes, state, district, train_period=7, val_period=7,
        n_days_back_smooth=ndays,
        data_from_tracker=True, initialisation='intermediate', num_evals=700,
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    predictions_dict[(state, district)]['m2'] = rsingle_fitting_cycle(smoothfunc,
        dataframes, state, district, train_period=7, val_period=0, num_evals=700,
        n_days_back_smooth=ndays,
        train_on_val=True, data_from_tracker=True, initialisation='intermediate',
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    
    predictions_dict[(state, district)]['state'] = state
    predictions_dict[(state, district)]['dist'] = district
    predictions_dict[(state, district)]['fitting_date'] = datetime.datetime.now().strftime("%Y-%m-%d")
    predictions_dict[(state, district)]['datasource'] = 'covid19api' if predictions_dict[(state, district)]['m1']['data_from_tracker'] else 'municipality'
    predictions_dict[(state, district)]['variable_param_ranges'] = predictions_dict[(state, district)]['m1']['variable_param_ranges']
    predictions_dict[(state, district)]['data_last_date'] = predictions_dict[(state, district)]['m2']['data_last_date']

fitting to data with "train_on_val" set to False ..
60 deaths and 1148 recoveries in Maharashtra with unknown district


KeyError: 'n_recovered'

## Create Master Loss Dataframe

### M1 Loss

In [113]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m1']['df_loss'].columns, predictions_dict[starting_key]['m1']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m1']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

KeyError: 'm1'

### M2 Loss

In [114]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m2']['df_loss'].columns, predictions_dict[starting_key]['m2']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m2']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

KeyError: 'm2'

In [115]:
predictions_dict[('Maharashtra', 'Mumbai')]['m1']['ax']

KeyError: 'm1'

## Plot Forecasts

In [116]:
for region in predictions_dict.keys():
    predictions_dict[region]['forecast'] = plot_forecast(predictions_dict[region], region, both_forecasts=False, error_bars=True)

getting forecasts ..


KeyError: 'm2'

In [117]:
for region in predictions_dict.keys():
    create_report(predictions_dict[region])

KeyError: 'fitting_date'

## Create and Save Output CSV

In [118]:
df_output = create_all_csvs(predictions_dict, icu_fraction=0.02)

compiling csv data ..
getting forecasts ..


KeyError: 'm2'

In [119]:
write_csv(df_output, '../../output-{}.csv'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

NameError: name 'df_output' is not defined