In [1]:
%load_ext autoreload
%autoreload 2

Please keep this notebook functionally at par with scripts/oncall/generate_report.py AND linearly runnable (no code with duplicated functionality; run all from this cell down should produce a report). Modify the parameters in `single_fitting_cycle` and `plot_trials` per customizations

In [2]:
import numpy as np
import pandas as pd

import datetime
import copy
import json
import time

import sys
sys.path.append('../../')

from data.dataloader import get_covid19india_api_data
from data.processing import get_data
from models.ihme.dataloader import get_dataframes_cached

from models.seir.seir_testing import SEIR_Testing
from models.seir.seirhd import SEIRHD
from models.seir.seir_movement import SEIR_Movement
from models.seir.seir_movement_testing import SEIR_Movement_Testing

from main.seir.fitting import get_variable_param_ranges
from main.seir.fitting import get_regional_data, data_setup, run_cycle
# from main.seir.fitting import single_fitting_cycle, get_variable_param_ranges
from main.seir.forecast import get_forecast, create_region_csv, create_all_csvs, write_csv
from main.seir.forecast import order_trials, get_all_trials
from utils.create_report import create_report, trials_to_df
from utils.enums import Columns
from viz import plot_forecast, plot_trials


In [None]:
t = time.time()
output_folder = f'../../reports/{t}'

## Load Covid19india Data

In [None]:
# dataframes = get_covid19india_api_data()
dataframes = get_dataframes_cached()

In [None]:
predictions_dict = {}

## Select Districts to fit on

In [None]:
# districts_to_show = [('Maharashtra', 'Pune'), 
#                      ('Maharashtra', 'Mumbai'), 
#                      ('Rajasthan', 'Jaipur'), 
#                      ('Gujarat', 'Ahmedabad'), 
#                      ('Karnataka', 'Bengaluru Urban'),
#                      ('Delhi', None)]

districts_to_show = [('Maharashtra', 'Mumbai')]

## Perform M1 and M2 fits

In [3]:
which_compartments = ['hospitalised', 'total_infected', 'deceased', 'recovered']

In [4]:
df_district, df_raw = get_regional_data(dataframes, state, district, data_from_tracker, data_format, filename, smooth_jump=True,
    smoothing_length=33, smoothing_method='weighted', 
    t_recov=14, return_extra=False)


NameError: name 'dataframes' is not defined

In [None]:
observed_dataframes = data_setup(df_district, df_raw, val_period=7, which_compartments=which_compartments)
predictions_dict = run_cycle(state, district, observed_dataframes, model=SEIR_Testing, data_from_tracker=False, train_period=7, which_compartments=which_compartments, num_evals=700, N=1e7, initialisation='intermediate')

In [None]:
for state, district in districts_to_show:
    predictions_dict[(state, district)] = {}
    observed_dataframes = data_setup(df_district, df_raw, val_period=7, which_compartments=which_compartments)
    predictions_dict[(state, district)]['m1'] = run_cycle(
        state, district, observed_dataframes, model=SEIR_Testing, 
        data_from_tracker=False, train_period=7, 
        which_compartments=which_compartments, num_evals=700, 
        N=1e7, initialisation='intermediate')
    observed_dataframes = data_setup(predictions_dict[(state, district)]['m1']['df_prediction'], df_raw, val_period=0, which_compartments=which_compartments)
    predictions_dict[(state, district)]['m2'] = run_cycle(
        state, district, observed_dataframes, model=SEIR_Testing, 
        data_from_tracker=False, train_period=7, 
        which_compartments=which_compartments, num_evals=700, 
        N=1e7, initialisation='intermediate')
    
    predictions_dict[(state, district)]['state'] = state
    predictions_dict[(state, district)]['dist'] = district
    predictions_dict[(state, district)]['fitting_date'] = datetime.datetime.now().strftime("%Y-%m-%d")
    predictions_dict[(state, district)]['datasource'] = 'covid19api' if predictions_dict[(state, district)]['m1']['data_from_tracker'] else 'municipality'
    predictions_dict[(state, district)]['variable_param_ranges'] = predictions_dict[(state, district)]['m1']['variable_param_ranges']
    predictions_dict[(state, district)]['data_last_date'] = predictions_dict[(state, district)]['m2']['data_last_date']

In [None]:
# predictions_dict[('Maharashtra', 'Mumbai')]['m1']['smoothing_plot'].figure

In [None]:
predictions_dict[('Maharashtra', 'Mumbai')]['m1']['df_loss']

## Create Master Loss Dataframe

### M1 Loss

In [None]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m1']['df_loss'].columns, predictions_dict[starting_key]['m1']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m1']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

### M2 Loss

In [None]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m2']['df_loss'].columns, predictions_dict[starting_key]['m2']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m2']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

## Plot Forecasts

In [None]:
for region in predictions_dict.keys():
    predictions_dict[region]['m2']['forecast'] = plot_forecast(predictions_dict[region], region, both_forecasts=False, error_bars=True)
    
    predictions, losses, params = get_all_trials(predictions_dict[region], train_fit='m1')
    predictions_dict[region]['m1']['params'] = params
    predictions_dict[region]['m1']['losses'] = losses
    predictions_dict[region]['m1']['predictions'] = predictions
    predictions_dict[region]['m1']['all_trials'] = trials_to_df(predictions, losses, params)
    predictions, losses, params = get_all_trials(predictions_dict[region], train_fit='m2')
    predictions_dict[region]['m2']['params'] = params
    predictions_dict[region]['m2']['losses'] = losses
    predictions_dict[region]['m2']['predictions'] = predictions
    predictions_dict[region]['m2']['all_trials'] = trials_to_df(predictions, losses, params)
    kforecasts = plot_trials(
        predictions_dict[region],
        train_fit='m2',
        predictions=predictions, 
        losses=losses, params=params, 
        k=args.ktrials,
        which_compartments=[Columns.confirmed, Columns.active])
    predictions_dict[region]['m2']['forecast_confirmed_topk'] = kforecasts[Columns.confirmed]
    predictions_dict[region]['m2']['forecast_active_topk'] = kforecasts[Columns.active]


In [None]:
predictions_dict[region]['m2']['forecast'].figure

In [None]:
predictions_dict[region]['m2']['forecast_confirmed_topk'].figure

In [None]:
predictions_dict[region]['m2']['forecast_active_topk'].figure

## Create Report


In [None]:
for region in predictions_dict.keys():
    create_report(predictions_dict[region], ROOT_DIR=output_folder)
    predictions_dict[region]['m1']['all_trials'].to_csv(os.path.join(output_folder, 'm1-trials.csv'))
    predictions_dict[region]['m2']['all_trials'].to_csv(os.path.join(output_folder, 'm2-trials.csv'))

## Create and Save Output CSV

In [None]:
df_output = create_all_csvs(predictions_dict, icu_fraction=0.02)

In [None]:
write_csv(df_output, filename=os.path.join(output_folder, f'output-{t}.csv'))