In [None]:
%load_ext dotenv
%dotenv

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch

torch.set_num_threads(1)

import skopt
from skopt.space import Integer, Real, Categorical
from skopt.plots import plot_objective

from drn import *

from generate_synthetic_dataset import generate_synthetic_gamma_lognormal

from hyperparameter_tuning_objectives import objective_cann, objective_ddr, objective_drn, objective_mdn

In [None]:
MODEL_DIR = Path("models/synth")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
features, target, means, dispersion = generate_synthetic_gamma_lognormal(20000)
df = pd.concat([features, target], axis=1)
df.to_csv("synth.csv", index=False)
df

In [None]:
x_train, x_val, x_test, y_train, y_val, y_test,\
      x_train_raw, x_val_raw, x_test_raw,\
          num_features, cat_features,\
             all_categories, ct =\
                split_and_preprocess(features, target, ['X_1', 'X_2'], [], seed = 42, num_standard = False)
x_train

In [None]:
X_train = torch.Tensor(x_train.values)
Y_train = torch.Tensor(y_train.values)
X_val = torch.Tensor(x_val.values)
Y_val = torch.Tensor(y_val.values)
X_test = torch.Tensor(x_test.values)
Y_test = torch.Tensor(y_test.values)

train_dataset = torch.utils.data.TensorDataset(X_train, Y_train)
val_dataset = torch.utils.data.TensorDataset(X_val, Y_val)

In [None]:
distribution = "gamma" # distributional assumption for the GLM, CANN, MDN

### GLM

In [None]:
torch.manual_seed(23)
glm = GLM.from_statsmodels(X_train, Y_train, distribution=distribution)
torch.save(glm, MODEL_DIR / "glm.pkl")

### CANN

In [None]:
space_cann_synth = [
    Integer(1, 4, name='num_hidden_layers'),
    Categorical([16, 32, 64, 128, 256, 512], name='hidden_size'),
    Real(0.0, 0.5, name='dropout_rate'),
    Real(0.0002, 0.01, name='lr'),
    Categorical([128, 256, 512], name='batch_size'),
]

# Run Bayesian optimization
res_cann_synth = skopt.gp_minimize(
    lambda params: objective_cann(params, X_train, Y_train, X_val, Y_val, train_dataset, val_dataset, glm, 'gamma'),
    space_cann_synth,
    n_calls=125,
    n_random_starts=25,
    random_state=0,
    verbose=True
)
best_cann_params = res_cann_synth.x
print(best_cann_params)

In [None]:
torch.manual_seed(23)
cann = CANN(glm, num_hidden_layers=int(best_cann_params[0]), hidden_size=int(best_cann_params[1]),
         dropout_rate = best_cann_params[2])
train(
    cann,
    gaussian_deviance_loss if distribution == "gaussian" else gamma_deviance_loss,
    train_dataset,
    val_dataset,
    epochs=2000,
    lr=best_cann_params[-2],
    patience=50,
    batch_size=int(best_cann_params[-1]),
)
cann.update_dispersion(X_train, Y_train)
torch.save(cann, MODEL_DIR / "cann.pkl")

### MDN

In [None]:
space_mdn_synth = [
    Integer(1, 4, name='num_hidden_layers'),
    Categorical([16, 32, 64, 128, 256, 512], name='hidden_size'),
    Real(0.0, 0.5, name='dropout_rate'),
    Real(0.0002, 0.01, name='lr'),
    Integer(2, 10, name='num_components'),
    Categorical([128, 256, 512], name='batch_size'),
]

# Run Bayesian optimization
res_mdn_synth = skopt.gp_minimize(
    lambda params: objective_mdn(params,
                                X_train,
                                Y_train,
                                X_val, 
                                Y_val, 
                                train_dataset, 
                                val_dataset, 
                                'gamma'),
    space_mdn_synth,
    n_calls=125,
    n_random_starts=25,
    random_state=0,
    verbose=True
)
best_mdn_params = res_mdn_synth.x
print(best_mdn_params)

In [None]:
torch.manual_seed(23)
mdn = MDN(
        X_train.shape[1],
        num_hidden_layers=int(best_mdn_params[0]),
        hidden_size=int(best_mdn_params[1]),
        dropout_rate = best_mdn_params[2],\
        num_components=int(best_mdn_params[-2]),
        distribution= distribution)
train(
    mdn,
    gaussian_mdn_loss if distribution == "gaussian" else gamma_mdn_loss,
    train_dataset,
    val_dataset,
    lr=best_mdn_params[3],
    batch_size=int(best_mdn_params[-1]),
    epochs=2000,
    patience=50,
)
torch.save(mdn, MODEL_DIR / "mdn.pkl")

### DDR

In [None]:
space_ddr_synth = [
    Integer(1, 4, name='num_hidden_layers'),
    Categorical([16, 32, 64, 128, 256, 512], name='hidden_size'),
    Real(0.0, 0.5, name='dropout_rate'),
    Real(0.0002, 0.01, name='lr'),
    Categorical(np.linspace(0.01, 0.03, 9), name='proportion'),
    Categorical([128, 256, 512], name='batch_size'),
]

# Run Bayesian optimization
res_ddr_synth = skopt.gp_minimize(
    lambda params: objective_ddr(params, X_train, Y_train, X_val, Y_val, train_dataset, val_dataset),
    space_ddr_synth,
    n_calls=125,
    n_random_starts=25,
    random_state=0,
    verbose=True
)
best_ddr_params = res_ddr_synth.x
print(best_ddr_params)

In [None]:
torch.manual_seed(23)
cutpoints_DDR = ddr_cutpoints(c_0 = np.min(y_train) * 1.05 if np.min(y_train) < 0 else 0.0,
                              c_K = np.max(y_train) * 1.05, 
                              y = y_train,
                              p = best_ddr_params[-2])
ddr = DDR(x_train.shape[1], cutpoints_DDR , num_hidden_layers=int(best_ddr_params[0]),
                     hidden_size=int(best_ddr_params[1]), dropout_rate = best_ddr_params[2])
train(
    ddr,
    ddr_loss,
    train_dataset,
    val_dataset, 
    lr=best_ddr_params[3],
    batch_size=int(best_ddr_params[-1]),
    log_interval=1,
    patience=30,
    epochs=2000,
)
torch.save(ddr, MODEL_DIR / "ddr.pkl")

### DRN

In [None]:
space_drn_synth = [
    Integer(1, 4, name='num_hidden_layers'),
    Categorical([16, 32, 64, 128, 256, 512], name='hidden_size'),
    Real(0.0, 0.5, name='dropout_rate'),
    Real(0.0002, 0.01, name='lr', prior = 'log-uniform'),
    Real(1e-5, 1e-1, name='kl_alpha', prior = 'log-uniform'),
    Categorical([1e-5, 1e-4, 1e-3, 1e-2, 1e-1], name='mean_alpha'),
    Categorical([1e-3, 1e-2, 1e-1], name='dv_alpha'),
    Categorical([128, 256, 512], name='batch_size'),
    Categorical(np.linspace(0.02, 0.03, 5), name='proportion'),
    Categorical([1, 3, 5], name='min_obs'),
]

# Run Bayesian optimization
res_drn_synth = skopt.gp_minimize(
    lambda params: objective_drn(params, criteria = 'CRPS', 
                                X_train=X_train,
                                Y_train=Y_train,
                                X_val=X_val,
                                Y_val=Y_val,
                                train_dataset=train_dataset,
                                val_dataset=val_dataset,
                                glm = glm,
                                kl_direction = 'forwards'),
    space_drn_synth,
    n_calls=125,
    n_random_starts=25,
    random_state=0,
    verbose=True
)
best_drn_params = res_drn_synth.x
print(best_drn_params)

# If we wish,we can generate the following 'correlation' plot,
# loss function values vs. hyperparameters 
with plt.rc_context(
                {'xtick.labelsize': 'x-small', 
                 'ytick.labelsize': 'x-small',
                 'axes.labelsize': 'x-small',
                 'axes.titlesize': 'x-small'}):
    plot_objective(res_drn_synth)
    plt.show()

In [None]:
torch.manual_seed(23)
cutpoints_DRN = drn_cutpoints(
    c_0=np.min(Y_train.detach().numpy()) * 1.05
        if np.min(Y_train.detach().numpy()) < 0 else 0.0,
    c_K=np.max(Y_train.detach().numpy()) * 1.05,
    p=best_drn_params[-2],
    y=Y_train.detach().numpy(),
    min_obs=int(best_drn_params[-1])
)
drn = DRN(
    num_features=X_train.shape[1],
    cutpoints=cutpoints_DRN,
    glm=glm,
    hidden_size=int(best_drn_params[1]),
    num_hidden_layers=int(best_drn_params[0]),
    baseline_start=False,
    dropout_rate=best_drn_params[2]
)
train(
    model=drn,
    criterion=lambda pred, y: drn_loss(
        pred, y,
        kl_alpha=best_drn_params[4],
        mean_alpha=best_drn_params[5],
        dv_alpha=best_drn_params[6],
        kl_direction='forwards'
    ),
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    batch_size=int(best_drn_params[7]),
    epochs=2000,
    patience=30,
    lr=best_drn_params[3],
    print_details=True,
    log_interval=1,
)
torch.save(drn, MODEL_DIR / "drn.pkl")