In [None]:
from dotenv import load_dotenv, find_dotenv
assert load_dotenv(find_dotenv(usecwd=False)), "No .env file found, please create one"

import pickle
from pathlib import Path

import numpy as np
import pandas as pd
import skopt
import torch
from drn import *
from skopt.space import Categorical, Integer, Real

from hyperparameter_tuning_objectives import (
    objective_cann,
    objective_ddr,
    objective_drn,
    objective_mdn,
    seed_everything,
)

torch.set_num_threads(1)

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

In [None]:
MODEL_DIR = Path("models/real")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
hp_opts = {
    "n_calls": 200,
    "n_random_starts": 25,
    "verbose": False,
}

In [None]:
csv_file_path = "data/freMPL1.csv"
df = pd.read_csv(csv_file_path)
claims = df.loc[df["ClaimAmount"] > 0, :]

In [None]:
# Scaling
target = claims["ClaimAmount"] / 1000
features = claims.drop("ClaimAmount", axis=1)
features = features.drop(
    ["RecordBeg", "RecordEnd", "ClaimInd", "Garage"], axis=1
)  # Drop garage due to missing values

# Convert "VehAge" categories to numeric
features["VehAge"] = features["VehAge"].map(
    {
        "0": 0,
        "1": 1,
        "2": 2,
        "3": 3,
        "4": 4,
        "5": 5,
        "6-7": 6,
        "8-9": 8,
        "10+": 11,
    }
)
feature_names = features.columns

speed_ranges = [speed for speed in np.unique(features["VehMaxSpeed"])]
speed_series = pd.Series(speed_ranges)
mapping = {speed_range: i + 1 for i, speed_range in enumerate(speed_ranges)}
features["VehMaxSpeed"] = features["VehMaxSpeed"].map(mapping)
features["SocioCateg"] = features["SocioCateg"].str.extract("(\d+)").astype(int)

cat_features = [
    "HasKmLimit",
    "Gender",
    "MariStat",
    "VehUsage",
    "VehBody",
    "VehPrice",
    "VehEngine",
    "VehEnergy",
    "VehClass",
    "SocioCateg",
]

num_features = [feature for feature in features.columns if feature not in cat_features]

In [None]:
# Split and preprocess the data
(
    x_train,
    x_val,
    x_test,
    y_train,
    y_val,
    y_test,
    x_train_raw,
    x_val_raw,
    x_test_raw,
    num_features,
    cat_features,
    all_categories,
    ct,
) = split_and_preprocess(features, target, num_features, cat_features, seed=0)

# Calculate and print statistics for y_train, y_val, y_test
np.max(y_train), np.median(y_train), np.max(y_val), np.median(y_val), np.max(
    y_test
), np.median(y_test)

In [None]:
X_train = torch.Tensor(x_train.values).to(device)
Y_train = torch.Tensor(y_train.values).to(device)
X_val = torch.Tensor(x_val.values).to(device)
Y_val = torch.Tensor(y_val.values).to(device)
X_test = torch.Tensor(x_test.values).to(device)
Y_test = torch.Tensor(y_test.values).to(device)

train_dataset = torch.utils.data.TensorDataset(X_train, Y_train)
val_dataset = torch.utils.data.TensorDataset(X_val, Y_val)

In [None]:
distribution = "gamma"

In [None]:
# Generate random seeds for the various training runs
np.random.seed(2026)
seeds = [int(s) for s in np.random.randint(0, 2**32 - 1, size=8)]
seeds

### GLM

In [None]:
glm = GLM.from_statsmodels(X_train, Y_train, distribution=distribution)
torch.save(glm, MODEL_DIR / "glm.pkl")

### CANN

In [None]:
seed_everything(seeds[0])

space_cann_real = [
    Integer(1, 6, name="num_hidden_layers"),
    Categorical([32, 64, 128, 256, 512], name="hidden_size"),
    Real(0.0, 0.5, name="dropout_rate"),
    Real(0.0001, 0.01, name="lr"),
    Categorical([64, 128, 256, 512], name="batch_size"),
]

res_cann_real = skopt.gp_minimize(
    lambda params: objective_cann(
        params,
        glm,
        "gamma",
        X_train,
        Y_train,
        X_val,
        Y_val,
        device,
        patience=50,
    ),
    space_cann_real,
    **hp_opts,
)
with open(MODEL_DIR / "cann_hp.pkl", "wb") as f:
    res_cann_real.specs["args"].pop("func")
    pickle.dump(res_cann_real, f)
best_cann_params = res_cann_real.x
print(best_cann_params)

In [None]:
seed_everything(seeds[1])
cann = CANN(
    glm,
    num_hidden_layers=int(best_cann_params[0]),
    hidden_size=int(best_cann_params[1]),
    dropout_rate=best_cann_params[2],
)
train(
    cann,
    gaussian_deviance_loss if distribution == "gaussian" else gamma_deviance_loss,
    train_dataset,
    val_dataset,
    epochs=2000,
    lr=best_cann_params[-2],
    batch_size=int(best_cann_params[-1]),
    device=device,
    patience=50,
)
cann.update_dispersion(X_train, Y_train)
torch.save(cann, MODEL_DIR / "cann.pkl")

### MDN

In [None]:
seed_everything(seeds[2])

space_mdn_real = [
    Integer(1, 6, name="num_hidden_layers"),
    Categorical([32, 64, 128, 256, 512], name="hidden_size"),
    Real(0.0, 0.5, name="dropout_rate"),
    Real(0.0001, 0.01, name="lr"),
    Integer(2, 10, name="num_components"),
    Categorical([64, 128, 256, 512], name="batch_size"),
]

res_mdn_real = skopt.gp_minimize(
    lambda params: objective_mdn(
        params,
        "gamma",
        X_train,
        Y_train,
        X_val,
        Y_val,
        device,
        patience=50,
    ),
    space_mdn_real,
    **hp_opts,
)
with open(MODEL_DIR / "mdn_hp.pkl", "wb") as f:
    res_mdn_real.specs["args"].pop("func")
    pickle.dump(res_mdn_real, f)
best_mdn_params = res_mdn_real.x
print(best_mdn_params)

In [None]:
seed_everything(seeds[3])
mdn = MDN(
    X_train.shape[1],
    num_components=int(best_mdn_params[-2]),
    hidden_size=int(best_mdn_params[1]),
    num_hidden_layers=int(best_mdn_params[0]),
    dropout_rate=best_mdn_params[2],
    distribution=distribution,
)
train(
    mdn,
    gaussian_mdn_loss if distribution == "gaussian" else gamma_mdn_loss,
    train_dataset,
    val_dataset,
    lr=best_mdn_params[3],
    batch_size=int(best_mdn_params[-1]),
    epochs=2000,
    device=device,
    patience=50,
)
torch.save(mdn, MODEL_DIR / "mdn.pkl")

### DDR

In [None]:
seed_everything(seeds[4])
space_ddr_real = [
    Integer(1, 6, name="num_hidden_layers"),
    Categorical([32, 64, 128, 256, 512], name="hidden_size"),
    Real(0.0, 0.5, name="dropout_rate"),
    Real(0.0002, 0.01, name="lr"),
    Categorical([0.05, 0.075, 0.1, 0.125, 0.15], name="proportion"),
    Categorical([64, 128, 256, 512], name="batch_size"),
]

res_ddr_real = skopt.gp_minimize(
    lambda params: objective_ddr(
        params,
        X_train,
        Y_train,
        X_val,
        Y_val,
        device,
        patience=30,
    ),
    space_ddr_real,
    **hp_opts,
)
with open(MODEL_DIR / "ddr_hp.pkl", "wb") as f:
    res_ddr_real.specs["args"].pop("func")
    pickle.dump(res_ddr_real, f)
best_ddr_params = res_ddr_real.x
print(best_ddr_params)

In [None]:
seed_everything(seeds[5])
cutpoints = ddr_cutpoints(
    c_0=max(Y_train.min().item() * 1.05, 0.0),
    c_K=Y_train.max().item() * 1.05,
    proportion=best_ddr_params[-2],
    n=X_train.shape[0],
)
ddr = DDR(
    X_train.shape[1],
    cutpoints=cutpoints,
    hidden_size=int(best_ddr_params[1]),
    num_hidden_layers=int(best_ddr_params[0]),
    dropout_rate=best_ddr_params[2],
)
torch.manual_seed(23)
train(
    ddr,
    ddr_loss,
    train_dataset,
    val_dataset,
    lr=0.0005,
    batch_size=100,
    log_interval=1,
    epochs=1000,
    device=device,
    patience=30,
)
torch.save(ddr, MODEL_DIR / "ddr.pkl")

### DRN

In [None]:
seed_everything(seeds[6])

space_drn_real = [
    Integer(1, 6, name="num_hidden_layers"),
    Categorical([32, 64, 128, 256, 512], name="hidden_size"),
    Real(0.0, 0.5, name="dropout_rate"),
    Real(0.0002, 0.01, name="lr", prior="log-uniform"),
    Real(1e-6, 1e-1, name="kl_alpha", prior="log-uniform"),
    Categorical([0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], name="mean_alpha"),
    Categorical([0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], name="dv_alpha"),
    Categorical([64, 128, 256, 512], name="batch_size"),
    Categorical([0.1, 0.125, 0.15], name="proportion"),
    Categorical([1, 3, 5], name="min_observation"),
]

res_drn_real = skopt.gp_minimize(
    lambda params: objective_drn(
        params,
        glm=glm,
        kl_direction="forwards",
        criteria="CRPS",
        X_train=X_train,
        Y_train=Y_train,
        X_val=X_val,
        Y_val=Y_val,
        device=device,
        patience=30,
    ),
    space_drn_real,
    **hp_opts,
)
with open(MODEL_DIR / "drn_hp.pkl", "wb") as f:
    res_drn_real.specs["args"].pop("func")
    pickle.dump(res_drn_real, f)
best_drn_params = res_drn_real.x
print(best_drn_params)

In [None]:
seed_everything(seeds[7])

cutpoints = drn_cutpoints(
    c_0=max(Y_train.min().item() * 1.05, 0.0),
    c_K=Y_train.max().item() * 1.05,
    y=y_train,
    proportion=best_drn_params[-2],
    min_obs=int(best_drn_params[-1]),
)
drn = DRN(
    num_features=X_train.shape[1],
    cutpoints=cutpoints,
    glm=glm,
    hidden_size=int(best_drn_params[1]),
    num_hidden_layers=int(best_drn_params[0]),
    baseline_start=False,
    dropout_rate=best_drn_params[2],
)
train(
    model=drn,
    criterion=lambda pred, y: drn_loss(
        pred,
        y,
        kl_alpha=best_drn_params[4],
        mean_alpha=best_drn_params[5],
        tv_alpha=0,
        dv_alpha=best_drn_params[6],
    ),
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    batch_size=int(best_drn_params[7]),
    epochs=2000,
    lr=best_drn_params[3],
    device=device,
    patience=30,
    print_details=True,
)
torch.save(drn, MODEL_DIR / "drn.pkl")