# Ensemble

Todo:

- Remove max evals here - it doesn't work

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import mlflow
import os

from datetime import datetime, timedelta
from copy import deepcopy

from nb_utils import train_eval_plot_ensemble
from publishers.report_generation import create_report
from publishers.mlflow_logging import log_to_mlflow
from utils.data_transformer_helper import flatten, flatten_train_loss_config, flatten_eval_loss_config

In [None]:
with open('../config/sample_homogeneous_train_config.json') as f_train, \
    open('../config/sample_homogeneous_test_config.json') as f_test, \
    open('../config/sample_homogeneous_forecast_config.json') as f_forecast:
    default_train_config = json.load(f_train)
    default_test_config = json.load(f_test)
    default_forecast_config = json.load(f_forecast)

In [None]:
with open('mlflow_credentials.json') as f_cred:
    mlflow_credentials = json.load(f_cred)

In [None]:
os.environ['MLFLOW_TRACKING_USERNAME'] = mlflow_credentials['username']
os.environ['MLFLOW_TRACKING_PASSWORD'] = mlflow_credentials['password']

mlflow.set_tracking_uri("http://ec2-54-175-207-176.compute-1.amazonaws.com")
mlflow.set_experiment("SEIHRD_ENSEMBLE_V0")

### Inputs to train-eval-plot

In [None]:
model_class = 'homogeneous_ensemble'
default_train_config['model_class'] = model_class
default_test_config['model_class'] = model_class
default_forecast_config['model_class'] = model_class

default_train_config['model_parameters']['n'] = 10
default_train_config['search_parameters']["frac_for_child"] = 0.5
default_train_config['search_parameters']['child_model']['max_evals'] = 10
default_train_config['search_parameters']['ensemble_model']['max_evals'] = 10
default_forecast_config['model_parameters']['uncertainty_parameters']['date_of_interest'] = '6/25/20'
default_forecast_config['model_parameters']['uncertainty_parameters']['percentiles'] = [10, 50, 90]
default_forecast_config['model_parameters']['uncertainty_parameters']['ci'] = []

In [None]:
current_day = datetime.now().date() - timedelta(4) # Forecast starts from the day after current day
forecast_length = 30 # Length of forecast interval (days)

In [None]:
region = ['delhi']
region_type = 'state'

current_day = datetime.now().date() - timedelta(40)
forecast_length = 30

name_prefix = region

### Train eval plot

In [None]:
# remove max_evals here, it doesn't work
params, metrics, artifacts_dir, train1_params, train2_params = train_eval_plot_ensemble(region, region_type, 
                                                current_day, forecast_length,
                                                default_train_config, default_test_config, default_forecast_config,
                                                train_period = 14, test_period = 7,
                                                max_evals = 100, data_source = 'rootnet_stats_history', 
                                                output_dir='../outputs/test/',
                                                mlflow_log = False, mlflow_run_name = "Ensemble testing")

### Logging

In [None]:
output_dir='../outputs/test/'
artifact_list = {
    'plot_M1_CARD': os.path.join(output_dir,'m1.png'),
    'plot_M1_single_C': os.path.join(output_dir,'m1_confirmed.png'),
    'plot_M1_single_A': os.path.join(output_dir,'m1_hospitalized.png'),
    'plot_M1_single_R': os.path.join(output_dir,'m1_recovered.png'),
    'plot_M1_single_D': os.path.join(output_dir,'m1_deceased.png'),
    'plot_M2_CARD': os.path.join(output_dir,'m2.png'),
    'plot_M2_single_C': os.path.join(output_dir,'m2_confirmed.png'),
    'plot_M2_single_A': os.path.join(output_dir,'m2_hospitalized.png'),
    'plot_M2_single_R': os.path.join(output_dir,'m2_recovered.png'),
    'plot_M2_single_D': os.path.join(output_dir,'m2_deceased.png'),
    'plot_M2_forecast_CARD': os.path.join(output_dir,'m2_forecast.png'),
    'plot_M2_forecast_single_C': os.path.join(output_dir,'m2_forecast_confirmed.png'),
    'plot_M2_forecast_single_A': os.path.join(output_dir,'m2_forecast_hospitalized.png'),
    'plot_M2_forecast_single_R': os.path.join(output_dir,'m2_forecast_recovered.png'), 
    'plot_M2_forecast_single_D': os.path.join(output_dir,'m2_forecast_deceased.png'),
    'plot_planning_pdf_cdf': os.path.join(output_dir,'m2_distribution.png'),
    'output_forecast_file': os.path.join('file:///', os.getcwd(),'../outputs/test/','forecast.csv')
}

In [None]:
create_report(params, metrics, artifact_list, 
              template_path='../src/publishers/template_v1.mustache', report_path='../src/publishers/report.md')

In [None]:
params_log = deepcopy(params)
metrics_log = deepcopy(metrics)

params_log['train_loss_function_config'] = flatten_train_loss_config(params['train_loss_function_config'])
params_log['eval_loss_function_config'] = flatten_eval_loss_config(params['eval_loss_function_config'])
params_log = flatten(params_log)
metrics_log = flatten(metrics_log)

del metrics_log['M1_losses']
del metrics_log['M2_losses']

log_to_mlflow(params_log, metrics_log, artifact_list, experiment_name="SEIHRD_ENSEMBLE_V0", run_name='testing')