# Ensemble model

In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import json
import pandas as pd
from datetime import datetime, timedelta
from copy import deepcopy

from modules.training_module import TrainingModule
from model_wrappers.model_factory import ModelFactory
from configs.base_config import TrainingModuleConfig

from nb_utils import train_eval, forecast
from nb_utils import plot_m1, plot_m2, plot_m3
from nb_utils import set_dates

In [13]:
with open('../notebooks/train_config.json') as f_train_1, \
    open('../config/sample_ensemble_train_config.json') as f_train_2, \
    open('../config/sample_ensemble_test_config.json') as f_test, \
    open('../config/sample_ensemble_forecasting_config.json') as f_forecast:
    default_train_config = json.load(f_train_1)
    ensemble_train_config = json.load(f_train_2)
    ensemble_test_config = json.load(f_test)
    ensemble_forecast_config = json.load(f_forecast)

In [14]:
# region = ['bengaluru urban'] 
# region_type = 'district'
# current_day = datetime.now().date() - timedelta(24)
# forecast_length = 30

# train_eval_plot(region, region_type, 
#                 current_day, forecast_length,
#                 ensemble_train_config, ensemble_test_config,
#                 max_evals = 10, 'tracker_district_daily',
#                 mlflow_log = False, mlflow_run_name = None)

## Train-eval-plot step by step

In [15]:
current_day = datetime.now().date() - timedelta(23)

dates = set_dates(current_day)
    
train1_start_date = dates['train1_start_date']
train1_end_date = dates['train1_end_date']
train1_run_day = dates['train1_run_day']

train2_start_date = dates['train2_start_date']
train2_end_date = dates['train2_end_date']
train2_run_day = dates['train2_run_day']

test_start_date = dates['test_start_date']
test_end_date = dates['test_end_date']
test_run_day = dates['test_run_day']

dates

{'train1_start_date': '5/28/20',
 'train1_end_date': '6/3/20',
 'train1_run_day': '5/27/20',
 'train2_start_date': '6/4/20',
 'train2_end_date': '6/11/20',
 'train2_run_day': '6/3/20',
 'test_start_date': '6/4/20',
 'test_end_date': '6/11/20',
 'test_run_day': '6/3/20'}

In [16]:
default_train_config['region_name'] = ['bengaluru urban']
default_train_config['region_type'] = 'district'
default_train_config['train_start_date'] = train1_start_date
default_train_config['train_end_date'] = train1_end_date
default_train_config['search_parameters']['max_evals'] = 10
default_train_config['ensemble'] = "True"
default_train_config['data_source'] = "tracker_district_daily"
default_train_config['output_filepath'] = "testing"

In [17]:
default_train_config

{'data_source': 'tracker_district_daily',
 'region_name': ['bengaluru urban'],
 'region_type': 'district',
 'train_start_date': '5/28/20',
 'train_end_date': '6/3/20',
 'model_class': 'SEIHRD',
 'model_parameters': {'incubation_period': 5, 'F_icu': 0.05},
 'ensemble': 'True',
 'search_space': {'r0': [0.5, 3],
  'EbyCRatio': [0.001, 70],
  'IbyCRatio': [0.001, 2],
  'infectious_period': [1, 10],
  'F_hospitalization': [0, 0.125],
  'F_fatalities': [0.02, 0.12]},
 'search_parameters': {'max_evals': 10},
 'training_loss_function': {'metric_name': 'mape',
  'variable_weights': [{'variable': 'confirmed', 'weight': 0.25},
   {'variable': 'recovered', 'weight': 0.25},
   {'variable': 'deceased', 'weight': 0.25},
   {'variable': 'hospitalized', 'weight': 0.25}]},
 'loss_functions': [{'metric_name': 'mape',
   'variable_weights': [{'variable': 'confirmed', 'weight': 1}]},
  {'metric_name': 'mape',
   'variable_weights': [{'variable': 'hospitalized', 'weight': 1}]},
  {'metric_name': 'mape',
   

In [18]:
ensemble_train_config

{'model_class': 'heterogeneous_ensemble',
 'ensemble': 'False',
 'train_start_date': '',
 'train_end_date': '',
 'model_parameters': {'constituent_models': {'0': {'model_class': 'SEIHRD',
    'model_parameters': {'incubation_period': 5,
     'F_icu': 0.05,
     'EbyCRatio': 57.24504539983303,
     'F_fatalities': 0.02939840410615913,
     'F_hospitalization': 0.09255640493613826,
     'IbyCRatio': 0.6853391013721378,
     'infectious_period': 1.356269916680188,
     'r0': 2.058302737418627,
     'LatentEbyCRatio': {'6/10/20': 57.24504539983303,
      '6/20/20': 0.3503055147832431},
     'LatentIbyCRatio': {'6/10/20': 0.6853391013721378,
      '6/20/20': 0.09134310512001675}}},
   '1': {'model_class': 'SEIHRD',
    'model_parameters': {'incubation_period': 5,
     'F_icu': 0.05,
     'EbyCRatio': 57.24504539983303,
     'F_fatalities': 0.02939840410615913,
     'F_hospitalization': 0.09255640493613826,
     'IbyCRatio': 0.6853391013721378,
     'infectious_period': 1.356269916680188,
  

In [19]:
train_config = TrainingModuleConfig.parse_obj(default_train_config)

In [20]:
training_output = TrainingModule.from_config(train_config)

t = 7.90                                              
t = 7.90                                                                       
t = 7.90                                                                       
t = 7.90                                                                        
t = 7.90                                                                        
t = 7.90                                                                        
t = 7.90                                                                        
t = 7.90                                                                        
t = 7.90                                                                        
t = 7.90                                                                        
100%|██████████| 10/10 [00:00<00:00, 11.61trial/s, best loss: 214.07106285056938]
Best fit: {'EbyCRatio': 11.308676725152303, 'F_fatalities': 0.048544830193874844, 'F_hospitalization': 0.09897127138930681, 'IbyCRatio':

In [21]:
training_output

{'model_parameters': {'incubation_period': 5,
  'F_icu': 0.05,
  'EbyCRatio': 11.308676725152303,
  'F_fatalities': 0.048544830193874844,
  'F_hospitalization': 0.09897127138930681,
  'IbyCRatio': 0.49848523063952005,
  'infectious_period': 5.761476226812343,
  'r0': 2.4065582061540747,
  'LatentEbyCRatio': {'5/27/20': 11.308676725152303,
   '6/3/20': 1.8890204260380237},
  'LatentIbyCRatio': {'5/27/20': 0.49848523063952005,
   '6/3/20': 0.1921306473985409},
  'MAPE': 214.07106285056938},
 'train_metric_results': [{'metric_name': <MetricName.mape: 'mape'>,
   'variable_weights': [{'variable': <ForecastVariable.confirmed: 'confirmed'>,
     'weight': 1.0}],
   'value': 201.55710741643094},
  {'metric_name': <MetricName.mape: 'mape'>,
   'variable_weights': [{'variable': <ForecastVariable.hospitalized: 'hospitalized'>,
     'weight': 1.0}],
   'value': 81.38985101676779},
  {'metric_name': <MetricName.mape: 'mape'>,
   'variable_weights': [{'variable': <ForecastVariable.recovered: 'recov

In [22]:
ensemble_train_config['model_parameters']['constituent_models'] = training_output['model_parameters']['constituent_models']

KeyError: 'constituent_models'

In [None]:
ensemble_train_config['model_parameters']['constituent_model_losses'] = training_output['model_parameters']['constituent_model_losses']

In [None]:
ensemble_train_config['model_parameters']['modes']['predict_mode'] = 'without_uncertainty'

In [None]:
ensemble_train_config

In [None]:
params, metrics, train1_model_params, train2_model_params = train_eval(['bengaluru urban'], 'district', 
                                                                       train1_start_date, train1_end_date, 
                                                                       train2_start_date, train2_end_date, train2_run_day,
                                                                       test_start_date, test_end_date,
                                                                       ensemble_train_config, ensemble_test_config,
                                                                       max_evals=10, data_source='tracker_district_daily',
                                                                       mlflow_log=False, name_prefix="testing")

In [None]:
params

In [None]:
metrics

In [None]:
train1_model_params

In [None]:
train2_model_params

In [None]:
forecast_start_date = '6/2/20'
forecast_length = 30
forecast_run_day = (datetime.strptime(forecast_start_date, "%m/%d/%y") - timedelta(days=1)).strftime("%-m/%-d/%y")
forecast_end_date = (datetime.strptime(forecast_start_date, "%m/%d/%y") + timedelta(days=forecast_length)).strftime("%-m/%-d/%y")

In [None]:
forecast(train2_model_params, forecast_run_day, forecast_start_date, forecast_end_date, ensemble_forecast_config)

In [None]:
plot_m1(train1_model_params, train1_run_day, train1_start_date, train1_end_date, 
        test_run_day, test_start_date, test_end_date, 
        rolling_average = False, uncertainty = False, 
        forecast_config = '../config/sample_ensemble_forecasting_config.json',
        plot_config = 'plot_config.json', plot_name = 'm1.png')

In [None]:
plot_m2(train2_model_params, train1_start_date, train1_end_date,
        test_run_day, test_start_date, test_end_date, 
        rolling_average = False, uncertainty = False, 
        forecast_config = '../config/sample_ensemble_forecasting_config.json',
        plot_config = 'plot_config.json', plot_name = 'm2.png')

In [None]:
plot_m3(train2_model_params, train1_start_date, 
        forecast_start_date, forecast_length, 
        rolling_average = False, uncertainty = False,
        forecast_config = '../config/sample_ensemble_forecasting_config.json',
        plot_config = 'plot_config.json', plot_name = 'm3.png')

In [None]:
train2_model_params

In [None]:
train2_model_params['model_parameters']['mode']['predict_mode'] = "with_uncertainty"

In [None]:
train2_model_params['model_parameters']['uncertainty_parameters'] = {
            "include_mean": "True",
            "percentiles": [10, 20, 30, 40, 50, 60, 70, 80, 90],
            "ci": 95,
            "date_of_interest": "6/3/20",
            "column_of_interest": "hospitalized",
            "tolerance": 0
        }

In [None]:
train2_model_params

In [None]:
uncertainty_df = forecast(train2_model_params, forecast_run_day, forecast_start_date, forecast_end_date, ensemble_forecast_config)

In [None]:
pd.set_option("display.max_columns", None)
uncertainty_df