# Water Futures approach to the Battle 
Spiegazione

In [1]:
# prepare the workspace
import eval
from eval.evaluator import WaterFuturesEvaluator
from eval.dashboard import run_dashboard

# prepare the evaluator
wfe = WaterFuturesEvaluator()

# Iteration 1
Iteration one is where we try and test as many models with as many tuning as we can. Then, we will select only a handful of them and the best tuning for each family. 
Also we try which is the best reconciliation technique between those available.
So the dataset is divided in *training* where we train and validate the models and the strategies and *test* where the models are tested on and the selected technique will produce the final forecast for the competition.
Following iterations will have only the second part as the training part is not really necessary anymore!

In [2]:
# Prepare the evaluator for the next iteration
wfe.next_iter()
# Collect all the models and the settings that we are considering
import models
import preprocessing

## Benchmarks
....

In [None]:
# Prepare the models
from models.benchmark import RollingAverageWeek, AutoRollingAverageWeek
from preprocessing.impute_and_fill import FillZero, FillAvgWeek

previous_week = {
    'name': 'PrevWeek',
    'model': RollingAverageWeek(1),
    'preprocessing': {
        'demand': [FillZero()],
        'weather': []
    },
    'deterministic': True
}
previous_week_v2 = {
    'name': 'PrevWeek_v2',
    'model': RollingAverageWeek(1),
    'preprocessing': {
        'demand': [FillAvgWeek()],
        'weather': []
    },
    'deterministic': True
}

average_week = {
    'name': 'AvgWeek',
    'model': RollingAverageWeek(None),
    'preprocessing': {
        'demand': [],
        'weather': []
    },
    'deterministic': True
}

rolling_average_2 = {
    'name': 'RollingAverage_2',
    'model': RollingAverageWeek(2),
    'preprocessing': {
        'demand': [FillZero()],
        'weather': []
    },
    'deterministic': True
}

rolling_average_4 = {
    'name': 'RollingAverage_4',
    'model': RollingAverageWeek(4),
    'preprocessing': {
        'demand': [FillZero()],
        'weather': []
    },
    'deterministic': True
}

rolling_average_8 = {
    'name': 'RollingAverage_8',
    'model': RollingAverageWeek(8),
    'preprocessing': {
        'demand': [FillZero()],
        'weather': []
    },
    'deterministic': True
}

auto_rollaw = {
    'name': 'AutoRollingAverage',
    'model': AutoRollingAverageWeek(),
    'preprocessing': {
        'demand': [FillAvgWeek()],
        'weather': []
    },
    'deterministic': False
}

models_configs = [
    previous_week,
    previous_week_v2,
    average_week,
    rolling_average_2,
    rolling_average_4,
    rolling_average_8,
    auto_rollaw
]

## Exp Roll Ave 
...

In [None]:
from models.exp_rolling_average_week import ExpWeightedRollingWeek

exp_rolling_average_2 = {
    'name': 'ExpRollingAverage_2',
    'model': ExpWeightedRollingWeek(2),
    'preprocessing': {
        'demand': [FillAvgWeek()],
        'weather': []
    },
    'deterministic': True
}

exp_rolling_average_4 = {
    'name': 'ExpRollingAverage_4',
    'model': ExpWeightedRollingWeek(4),
    'preprocessing': {
        'demand': [FillAvgWeek()],
        'weather': []
    },
    'deterministic': True
}

exp_rolling_average_8 = {
    'name': 'ExpRollingAverage_8',
    'model': ExpWeightedRollingWeek(8),
    'preprocessing': {
        'demand': [FillAvgWeek()],
        'weather': []
    },
    'deterministic': True
}

models_configs += [
    exp_rolling_average_2,
    exp_rolling_average_4,
    exp_rolling_average_8
]

## Pattern regression
...

In [None]:
from models.pattern_regression import PatternRegression, PatternRegressionDaily
from preprocessing.simple_transforms import Logarithm
from preprocessing.weather_feature_engineering import RealFeel, DewPoint, WindChill

pattern_regression = {
    'name': f'PatternRegression',
    'model': PatternRegression(),
    'preprocessing': {
        'demand': [Logarithm()],
        'weather': [RealFeel(), DewPoint(), WindChill()]
    },
    'deterministic': True
}

pattern_regression_daily = {
    'name': f'PatternRegressionDaily',
    'model': PatternRegressionDaily(),
    'preprocessing': {
        'demand': [Logarithm()],
        'weather': [RealFeel(), DewPoint(), WindChill()]
    },
    'deterministic': True
}

models_configs += [
    pattern_regression,
    pattern_regression_daily
]

# Prophet
...

In [None]:
from models.fbprophet import Fbprophet

prophet = {
    'name': 'FbProphet',
    'model': Fbprophet(),
    'preprocessing': {
        'demand': [],
        'weather': []
    },
    'deterministic': True
}

models_configs += [
    prophet
]

## Deep Learning Models 
### LGBM
...

In [None]:
from models.LGBM import LGBMrobust, LGBMsimple
from preprocessing.advanced_transforms import LGBM_demand_features, LGBM_impute_nan_demand
from preprocessing.advanced_transforms import LGBM_impute_nan_weather, LGBM_weather_features
from preprocessing.advanced_transforms import  LGBM_prepare_test_dfs

# No hyperparameter tuning for all parameters
lgb_params = {
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'num_leaves': 32,
        'max_depth': 6,
        'learning_rate': 0.01,
        'feature_fraction': 0.6,
        'bagging_fraction': 0.8,
        'bagging_freq':10,
        'verbose': -1
}

lgbm_simple = {
    'name': 'LGBMsimple',
    'model': LGBMsimple(lgb_params = lgb_params),
    'preprocessing': {
        'demand': [Logarithm(), LGBM_impute_nan_demand(), LGBM_demand_features(no_last_week=1)],
        'weather': [LGBM_impute_nan_weather(), LGBM_weather_features()],
        'prepare_test_dfs': [LGBM_prepare_test_dfs()]
    },
    'deterministic': False
}
lgbm_robust = {
    'name': 'LGBMrobust',
    'model': LGBMrobust(lgb_params = lgb_params),
    'preprocessing': {
        'demand': [Logarithm(), LGBM_impute_nan_demand(), LGBM_demand_features(no_last_week=1)],
        'weather': [LGBM_impute_nan_weather(), LGBM_weather_features()],
        'prepare_test_dfs': [LGBM_prepare_test_dfs()]
    },
    'deterministic': False
}
lgbm_simple_with_last_week = {
    'name': 'LGBMsimple_with_last week',
    'model': LGBMsimple(lgb_params = lgb_params),
    'preprocessing': {
        'demand': [Logarithm(), LGBM_impute_nan_demand(), LGBM_demand_features(no_last_week=0)],
        'weather': [LGBM_impute_nan_weather(), LGBM_weather_features()],
        'prepare_test_dfs': [LGBM_prepare_test_dfs()]
    },
    'deterministic': False
}

models_configs += [
    lgbm_simple,
    lgbm_robust,
    lgbm_simple_with_last_week
]

## XGBM

In [None]:
from models.LGBM import XGBMsimple

xgb_params = {
    'colsample_bytree': 0.8,
    'learning_rate': 0.02,
    'max_depth': 6,
    'subsample': 0.8,
    'objective':'reg:squarederror',
    'min_child_weight':10,
    'silent':1
}

xgbm_simple = {
    'name': 'XGBMsimple',
    'model': XGBMsimple(xgb_params = xgb_params),
    'preprocessing': {
        'demand': [Logarithm(), LGBM_impute_nan_demand(), LGBM_demand_features(no_last_week=0)],
        'weather': [LGBM_impute_nan_weather(), LGBM_weather_features()],
        'prepare_test_dfs': [LGBM_prepare_test_dfs()]
    },
    'deterministic': False
}

models_configs += [
    xgbm_simple
]

## TSMIX
...

In [None]:
from models.TSMix import TSMix

tsmix = {
    'name': 'TSMix',
    'model': TSMix(train_epochs=50, dropout=0.8),
    'preprocessing': {
        'demand': [Logarithm(), LGBM_impute_nan_demand()],
        'weather': []
    },
    'deterministic': False
}

models_configs += [
    tsmix
]

## Wavenet
...

In [None]:
from models.wavenet import WaveNetModel, WaveNet_prepare_test_dfs, cfg

wavenet = {
    'name': 'WaveNet',
    'model': WaveNetModel(cfg),
    'preprocessing': {
        'demand': [],
        'weather': [],
        'prepare_test_dfs': [WaveNet_prepare_test_dfs()]
    },
    'deterministic': False
}

models_configs += [
    wavenet
]

## Phase train

In [4]:
# Now, we can run the training of all these models and see how they perform
wfe.curr_phase='train'

for config in models_configs:
    wfe.add_model(config)

# See how they perform with the dashboard


Evaluating PrevWeek_v2 with seed 0 in train phase


100%|██████████| 52/52 [00:00<00:00, 415.28it/s]


Evaluating AutoRollingAverage with seed 0 in train phase


100%|██████████| 52/52 [00:04<00:00, 11.54it/s]


Evaluating AutoRollingAverage with seed 1 in train phase


100%|██████████| 52/52 [00:05<00:00, 10.36it/s]


Evaluating AutoRollingAverage with seed 2 in train phase


100%|██████████| 52/52 [00:04<00:00, 10.92it/s]


Evaluating AutoRollingAverage with seed 3 in train phase


100%|██████████| 52/52 [00:04<00:00, 11.07it/s]


Evaluating AutoRollingAverage with seed 4 in train phase


100%|██████████| 52/52 [00:04<00:00, 11.03it/s]


In [5]:
# test the strategies bla bla bla 


In [6]:
# run the selected models on the test 
wfe.curr_phase='test'
selected_models = [auto_rollaw,
                   pattern_regression,
                   prophet,
                   lgbm_simple,
                   lgbm_robust,
                   xgbm_simple,
                   lgbm_simple_with_last_week,
                   tsmix,
                   wavenet]
selected_strategy = None

for config in selected_models:
    wfe.add_model(config)


Evaluating PrevWeek_v2 with seed 0 in test phase


100%|██████████| 4/4 [00:00<00:00, 262.55it/s]


Evaluating AutoRollingAverage with seed 0 in test phase


100%|██████████| 4/4 [00:00<00:00,  9.33it/s]


Evaluating AutoRollingAverage with seed 1 in test phase


100%|██████████| 4/4 [00:00<00:00,  9.86it/s]


Evaluating AutoRollingAverage with seed 2 in test phase


100%|██████████| 4/4 [00:00<00:00, 10.88it/s]


Evaluating AutoRollingAverage with seed 3 in test phase


100%|██████████| 4/4 [00:00<00:00, 11.03it/s]


Evaluating AutoRollingAverage with seed 4 in test phase


100%|██████████| 4/4 [00:00<00:00,  9.79it/s]


Evaluating AutoRollingAverage with seed 5 in test phase


100%|██████████| 4/4 [00:00<00:00,  8.60it/s]


Evaluating AutoRollingAverage with seed 6 in test phase


100%|██████████| 4/4 [00:00<00:00, 10.29it/s]


Evaluating AutoRollingAverage with seed 7 in test phase


100%|██████████| 4/4 [00:00<00:00,  8.67it/s]


Evaluating AutoRollingAverage with seed 8 in test phase


100%|██████████| 4/4 [00:00<00:00,  9.02it/s]


Evaluating AutoRollingAverage with seed 9 in test phase


100%|██████████| 4/4 [00:00<00:00,  8.74it/s]


In [7]:
# qui abbiamo il risultato finale da consegnare per la submission

## Iteration 2
We get the new data

In [8]:
# load the new data
"""
wfe.next_iter()
wfe.curr_phase='test'

for config in selected_models:
    wfe.add_model(config)
"""

Evaluating PrevWeek_v2 with seed 0 in test phase


100%|██████████| 4/4 [00:00<00:00, 364.44it/s]


Evaluating AutoRollingAverage with seed 0 in test phase


100%|██████████| 4/4 [00:00<00:00, 14.37it/s]


Evaluating AutoRollingAverage with seed 1 in test phase


100%|██████████| 4/4 [00:00<00:00, 11.51it/s]


Evaluating AutoRollingAverage with seed 2 in test phase


100%|██████████| 4/4 [00:00<00:00, 12.98it/s]


Evaluating AutoRollingAverage with seed 3 in test phase


100%|██████████| 4/4 [00:00<00:00,  9.61it/s]


Evaluating AutoRollingAverage with seed 4 in test phase


100%|██████████| 4/4 [00:00<00:00, 10.11it/s]


Evaluating AutoRollingAverage with seed 5 in test phase


100%|██████████| 4/4 [00:00<00:00, 10.01it/s]


Evaluating AutoRollingAverage with seed 6 in test phase


100%|██████████| 4/4 [00:00<00:00, 13.29it/s]


Evaluating AutoRollingAverage with seed 7 in test phase


100%|██████████| 4/4 [00:00<00:00, 11.35it/s]


Evaluating AutoRollingAverage with seed 8 in test phase


100%|██████████| 4/4 [00:00<00:00, 11.22it/s]


Evaluating AutoRollingAverage with seed 9 in test phase


100%|██████████| 4/4 [00:00<00:00,  9.16it/s]
