In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

import datetime
import copy
import json

import sys
sys.path.append('../../')

from data.dataloader import Covid19IndiaLoader
from data.processing import get_data
from data.processing import get_dataframes_cached

from models.seir.seir_testing import SEIR_Testing
from models.seir.seirhd import SEIRHD
from models.seir.seir_movement import SEIR_Movement
from models.seir.seir_movement_testing import SEIR_Movement_Testing

from main.seir.fitting import single_fitting_cycle, get_variable_param_ranges
from main.seir.forecast import get_forecast, create_region_csv, create_all_csvs, write_csv, plot_forecast
from utils.create_report import create_report

## Load Covid19india Data

In [None]:
dataframes = get_dataframes_cached()

In [None]:
predictions_dict = {}

## Select Districts to fit on

In [None]:
# districts_to_show = [('Maharashtra', 'Pune'), 
#                      ('Maharashtra', 'Mumbai'), 
#                      ('Rajasthan', 'Jaipur'), 
#                      ('Gujarat', 'Ahmedabad'), 
#                      ('Karnataka', 'Bengaluru Urban'),
#                      ('Delhi', None)]

districts_to_show = [('Maharashtra', 'Mumbai')]

## Perform M1 and M2 fits

In [None]:
for state, district in districts_to_show:
    predictions_dict[(state, district)] = {}
    predictions_dict[(state, district)]['m1'] = single_fitting_cycle(
        dataframes, state, district, train_period=7, val_period=7, num_evals=5,
        data_from_tracker=False, initialisation='intermediate', model=SEIR_Testing, 
        smooth_jump=True, smoothing_method='weighted', smoothing_length=20,
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    predictions_dict[(state, district)]['m2'] = single_fitting_cycle(
        dataframes, state, district, train_period=7, val_period=0, num_evals=5,
        data_from_tracker=False, initialisation='intermediate', model=SEIR_Testing, 
        smooth_jump=True, smoothing_method='weighted', smoothing_length=20,
        which_compartments=['hospitalised', 'total_infected', 'deceased', 'recovered'])
    
    predictions_dict[(state, district)]['state'] = state
    predictions_dict[(state, district)]['dist'] = district
    predictions_dict[(state, district)]['fitting_date'] = datetime.datetime.now().strftime("%Y-%m-%d")
    predictions_dict[(state, district)]['datasource'] = 'covid19api' if predictions_dict[(state, district)]['m1']['data_from_tracker'] else 'municipality'
    predictions_dict[(state, district)]['variable_param_ranges'] = predictions_dict[(state, district)]['m1']['variable_param_ranges']
    predictions_dict[(state, district)]['data_last_date'] = predictions_dict[(state, district)]['m2']['data_last_date']

## Create Master Loss Dataframe

### M1 Loss

In [None]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m1']['df_loss'].columns, predictions_dict[starting_key]['m1']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m1']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

### M2 Loss

In [None]:
starting_key = list(predictions_dict.keys())[0]

loss_columns = pd.MultiIndex.from_product([predictions_dict[starting_key]['m2']['df_loss'].columns, predictions_dict[starting_key]['m2']['df_loss'].index])
loss_index = predictions_dict.keys()

df_loss_master = pd.DataFrame(columns=loss_columns, index=loss_index)
for key in predictions_dict.keys():
    df_loss_master.loc[key, :] = np.around(predictions_dict[key]['m2']['df_loss'].values.T.flatten().astype('float'), decimals=2)
    
df_loss_master

## Plot Forecasts

In [None]:
for region in predictions_dict.keys():
    predictions_dict[region]['forecast'] = plot_forecast(predictions_dict[region], region, both_forecasts=False, error_bars=True)

## Create Report (v1)

In [None]:
for region in predictions_dict.keys():
    create_report(predictions_dict[region])

## Create and Save Output CSV

In [None]:
df_output = create_all_csvs(predictions_dict, icu_fraction=0.02)

In [None]:
write_csv(df_output)

In [None]:
## Produce Top 10 Trials / Plots

In [None]:
m = 'm2'

In [None]:
params_array = []
for trial in predictions_dict[(state, district)][m]['trials']:
    params_dict = copy.copy(trial['misc']['vals'])
    for key in params_dict.keys():
        params_dict[key] = params_dict[key][0]
    params_array.append(params_dict)
params_array = np.array(params_array)

In [None]:
losses_array = np.array([trial['result']['loss'] for trial in predictions_dict[(state, district)][m]['trials']])
least_losses_indices = np.argsort(losses_array)
top10losses = losses_array[least_losses_indices][:10]

In [None]:
top10params = params_array[least_losses_indices[:10]]

In [None]:
top10predictions = [get_forecast(predictions_dict[('Maharashtra', 'Mumbai')],
                                  best_params=params_dict) for params_dict in top10params]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

def plot_trials(m_dict, top10losses, top10params, top10predictions):
    df_true = m_dict['df_district']
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.plot(df_true['date'], df_true['total_infected'],
            '-o', color='C0', label='Confirmed Cases (Observed)')
    for i, df_prediction in enumerate(top10predictions):
        loss_value = np.around(np.sort(top10losses)[:10][i], 2)
        sns.lineplot(x="date", y="total_infected", data=df_prediction,
                    ls='-', label='Confirmed Cases ({})'.format(loss_value))
        plt.text(x=df_prediction['date'].iloc[-1], y=df_prediction['total_infected'].iloc[-1], s=loss_value)
        
    ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
    ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.ylabel('No of People', fontsize=16)
    plt.yscale('log')
    plt.xlabel('Time', fontsize=16)
    plt.legend()
    plt.title('Forecast - ({} {})'.format(region[0], region[1]), fontsize=16)
    #plt.grid()
    return ax

In [None]:
for region in predictions_dict.keys():
    predictions_dict[region][m]['top10params'] = top10params
    predictions_dict[region][m]['forecast_top10'] = plot_trials(predictions_dict[region][m], top10losses, top10params, top10predictions)

In [None]:
import time
for region in predictions_dict.keys():
    t = time.time()
    create_report(predictions_dict[region], ROOT_DIR=f'../../reports/{t}')

In [None]:
from main.seir.fitting import get_regional_data 

orig_df_district, _ = get_regional_data(dataframes, 'Maharashtra', 'Mumbai', data_from_tracker=False, data_format='new', filename=None)

In [None]:
dataframes
state='Maharashtra'
district='Mumbai'
data_from_tracker=False
smooth_jump=True
smoothing_length=33
smoothing_method='weighted'
filename=None
data_format='new'

if data_from_tracker:
    main_df_district = get_data(dataframes, state=state, district=district, use_dataframe='districts_daily')
else:
    main_df_district = get_data(state=state, district=district, disable_tracker=True, filename=filename, 
                            data_format=data_format)

df_district_raw_data = get_data(dataframes, state=state, district=district, use_dataframe='raw_data')


In [None]:
df_district=copy.copy(main_df_district)
if smooth_jump:
    if data_from_tracker:
        d1, d2 = '2020-05-29', '2020-05-30'
    else:
        d1, d2 = '2020-05-28', '2020-05-29'
    df_district = df_district.set_index('date')
    big_jump = df_district.loc[d2, 'recovered'] - df_district.loc[d1, 'recovered']
    print(big_jump)
    if smoothing_method == 'linear':
        for i, day_number in enumerate(range(smoothing_length-2, -1, -1)):
            date = datetime.datetime.strptime(d1, '%Y-%m-%d') - datetime.timedelta(days=day_number)
            offset = np.random.binomial(1, (big_jump%smoothing_length)/smoothing_length)
            df_district.loc[date, 'recovered'] += ((i+1)*big_jump)//smoothing_length + offset
            df_district.loc[date, 'hospitalised'] -= ((i+1)*big_jump)//smoothing_length + offset

    elif smoothing_method == 'weighted':
        newcases = df_district['total_infected'].shift(14) - df_district['total_infected'].shift(15)
        idx = newcases.first_valid_index()
        newcases = newcases.loc[idx:d1]
        truncated = df_district.loc[idx:d1, :]
        invpercent = newcases.sum()/newcases
        runningsum = 0
        for i, day_number in enumerate(range(smoothing_length-2, -1, -1)):
            date = datetime.datetime.strptime(d1, '%Y-%m-%d') - datetime.timedelta(days=day_number)
            print (date, (big_jump%invpercent.loc[date])/newcases.loc[date])
            offset = np.random.binomial(1, (big_jump%invpercent.loc[date])/invpercent.loc[date])
            runningsum += (big_jump // invpercent.loc[date]) + offset
            truncated.loc[date:, 'recovered'] += (big_jump // invpercent.loc[date]) + offset
            truncated.loc[date:, 'hospitalised'] -= (big_jump // invpercent.loc[date]) + offset
            print(date, runningsum)
        print(truncated.index)
        df_district.loc[truncated.index, 'recovered'] = truncated['recovered'].astype('int64')
        df_district.loc[truncated.index, 'hospitalised'] = truncated['hospitalised'].astype('int64')

    assert((df_district['total_infected'] == df_district['hospitalised'] + df_district['deceased'] + df_district['recovered']).all())

In [None]:
new = df_district
orig = main_df_district.set_index('date')
# print(pd.concat([new['hospitalised'],orig['hospitalised']], axis=1))
print(new['recovered'] - orig['recovered'])
# print(pd.concat([new['recovered'],orig['recovered']], axis=1))

In [None]:
# df_district[44:][-10:]
compare_cols = ['total_infected','hospitalised','deceased','recovered']
# compare = (orig[compare_cols][-20:], new[compare_cols][-20:])
# compare = (new[compare_cols][-20:])
compare = (orig[compare_cols] - new[compare_cols])[-20:]
print (compare)# df_district[44:][-10:]

In [None]:
ti_check = copy.copy(orig['total_infected'])
ti_check2 = copy.copy(new['total_infected'])
for col in ['hospitalised', 'recovered', 'deceased']:
    ti_check -= new[col]
    ti_check2 -= new[col]
print(pd.concat([ti_check[-25:], ti_check2[-25:]], axis=1))