# 4. Hyperparameter search for pretraining phase

In [1]:
# allows for import from `src` directory
import sys 
sys.path.append('../')

from src import data
from src import utils

from models.encoder_decoder_dropout import *

## 4.1 Set up data

In [2]:
# fixed data parameters
data_params = {
    'n_input_steps': 48,
    'n_output_steps': 12
}

# run the data preprocessing pipeline to create dataset
_, _, samples = data.pipeline(data_params['n_input_steps'], data_params['n_output_steps'], '../data')
datasets = data.get_datasets(samples, data_params['n_input_steps'])


Data already downloaded

43910 train rows from 2012-10-02 09:00:00 to 2017-10-05 23:00:00
4320 valid rows from 2017-10-05 23:00:00 to 2018-04-03 23:00:00
4321 test rows from 2018-04-03 23:00:00 to 2018-09-30 23:00:00

16625 samples of 48 input steps and 12 output steps in train
3534 samples of 48 input steps and 12 output steps in valid
4020 samples of 48 input steps and 12 output steps in test



## 4.2 Bayesian optimisation

### Get device

We are using a GPU on a GCE instances as this is a compute heavy job

In [3]:
utils.get_device()

device(type='cuda', index=0)

### Define function to optimise

Wrap the `utils.train` and `utils.evaluate` to define the function to be optimised.

This function takes in a dictionary of hyperparameters and returns the loss value on the validation data using the model trained according to the passed in hyperparameters. We want to know which hyperparameters will minimise this validation loss, so we will use `Ax` to do a guided search using Bayesian optimisation.

In [4]:
def train_evaluate(params):
    device = utils.get_device()
    dataloaders = data.get_dataloaders(datasets, params.get('batch_size'))
    in_features = dataloaders['train'].dataset.X.shape[-1]
    model = VDEncoderDecoder(in_features=in_features,
                             input_steps=params.get('n_input_steps', 48),
                             output_steps=params.get('n_output_steps', 12),
                             p=params.get('variational_dropout_p')
                            ).to(device)
    model,_ = utils.train(device=device, model=model, dataloader=dataloaders['train'], params=params, use_tqdm=True)
    return utils.evaluate(device=device, model=model, valid_loader=dataloaders['valid'])

### Set up `Ax` client

We mainly follow the tutorial at https://ax.dev/tutorials/gpei_hartmann_service.html

In [6]:
from ax.service.ax_client import AxClient

ax_client = AxClient(enforce_sequential_optimization=False)

# define hyperparameter bounds 
ax_client.create_experiment(
    name='pretraining',
    parameters=[
        {"name": "num_epochs", "type": "range", "bounds": [150, 200]},
        {"name": "learning_rate", "type": "range", "bounds": [5e-4, 1e-3], "log_scale": True},
        {"name": "batch_size", "type": "range", "bounds": [128, 1024]},
        {"name": "variational_dropout_p", "type": "range", "bounds": [0.2,0.4]}
    ],
    objective_name='loss',
    minimize=True
)

def evaluate(parameters):
    return {"pretraining": train_evaluate(parameters)}


[INFO 09-13 18:14:12] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 2 decimal points.
[INFO 09-13 18:14:12] ax.modelbridge.dispatch_utils: Using Bayesian Optimization generation strategy: GenerationStrategy(name='Sobol+GPEI', steps=[Sobol for 5 trials, GPEI for subsequent trials]). Iterations after 5 will take longer to generate due to  model-fitting.


### Launch the optimisation loop

In [6]:
# run 20 jobs
for i in range(20):
    parameters, trial_index = ax_client.get_next_trial()
    ax_client.complete_trial(trial_index=trial_index, raw_data=evaluate(parameters)['pretraining']['loss'])

[INFO 09-13 16:29:04] ax.service.ax_client: Generated new trial 0 with parameters {'num_epochs': 181, 'learning_rate': 0.0, 'batch_size': 381, 'variational_dropout_p': 0.38}.
Epoch=180 | [16625|16625]	loss=1.2937: 100%|██████████| 181/181 [04:31<00:00,  1.50s/it]
[INFO 09-13 16:33:38] ax.service.ax_client: Completed trial 0 with data: {'loss': (0.44, None)}.
[INFO 09-13 16:33:38] ax.service.ax_client: Generated new trial 1 with parameters {'num_epochs': 168, 'learning_rate': 0.0, 'batch_size': 725, 'variational_dropout_p': 0.37}.
Epoch=167 | [16625|16625]	loss=1.3428: 100%|██████████| 168/168 [03:07<00:00,  1.11s/it]
[INFO 09-13 16:36:46] ax.service.ax_client: Completed trial 1 with data: {'loss': (0.92, None)}.
[INFO 09-13 16:36:46] ax.service.ax_client: Generated new trial 2 with parameters {'num_epochs': 188, 'learning_rate': 0.0, 'batch_size': 648, 'variational_dropout_p': 0.23}.
Epoch=187 | [16625|16625]	loss=0.7925: 100%|██████████| 188/188 [03:37<00:00,  1.16s/it]
[INFO 09-13 16

In [7]:
ax_client.load_from_json_file(
    '../model_artifacts/pretraining_hparam_search.json')

[INFO 09-13 18:14:41] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 2 decimal points.


<ax.service.ax_client.AxClient at 0x7f1a4c4236d0>

In [8]:
# run 20 jobs
for i in range(20):
    parameters, trial_index = ax_client.get_next_trial()
    ax_client.complete_trial(trial_index=trial_index, raw_data=evaluate(parameters)['pretraining']['loss'])

[INFO 09-13 18:14:45] ax.service.ax_client: Generated new trial 0 with parameters {'num_epochs': 195, 'learning_rate': 0.0, 'batch_size': 560, 'variational_dropout_p': 0.25}.
Epoch=194 | [16625|16625]	loss=0.8907: 100%|██████████| 195/195 [04:05<00:00,  1.26s/it]
[INFO 09-13 18:18:54] ax.service.ax_client: Completed trial 0 with data: {'loss': (0.29, None)}.
[INFO 09-13 18:18:54] ax.service.ax_client: Generated new trial 1 with parameters {'num_epochs': 188, 'learning_rate': 0.0, 'batch_size': 594, 'variational_dropout_p': 0.35}.
Epoch=187 | [16625|16625]	loss=1.1547: 100%|██████████| 188/188 [04:20<00:00,  1.39s/it]
[INFO 09-13 18:23:15] ax.service.ax_client: Completed trial 1 with data: {'loss': (0.45, None)}.
[INFO 09-13 18:23:15] ax.service.ax_client: Generated new trial 2 with parameters {'num_epochs': 196, 'learning_rate': 0.0, 'batch_size': 1002, 'variational_dropout_p': 0.29}.
Epoch=195 | [16625|16625]	loss=1.0670: 100%|██████████| 196/196 [03:35<00:00,  1.10s/it]
[INFO 09-13 1

### Get best parameters after 40 iterations of search

In [9]:
ax_client.get_best_parameters()

({'num_epochs': 192,
  'learning_rate': 0.0005753955111014699,
  'batch_size': 292,
  'variational_dropout_p': 0.24793713575125884},
 ({'loss': 0.259330976534118}, {'loss': {'loss': 0.0007888983119634765}}))

### Save results

In [10]:
ax_client.save_to_json_file(
    '../model_artifacts/pretraining_hparam_search.json')

[INFO 09-13 20:03:24] ax.service.ax_client: Saved JSON-serialized state of optimization to `../model_artifacts/pretraining_hparam_search.json`.
