In [None]:
import torch
import os
import numpy as np
import pandas as pd
from utils import modify_metadata, TARGETS
from input_utils import TrainDataset
from model import EnsembleModel
from training import train, CV_score
import matplotlib.pyplot as plt
from ax.service.managed_loop import optimize

In [None]:
TRAIN_METADATA_DIR = "../../data/train.csv"
TRAIN_SIGNATURES_DIR = "../../data/train_signatures/"

In [None]:
train_metadata = pd.read_csv(TRAIN_METADATA_DIR)
train_metadata = modify_metadata(train_metadata)

In [None]:
train_metadata

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = torch.nn.KLDivLoss(reduction='batchmean')

In [None]:
# hyperparameters
scaler_types = ["meanvar_1.0"]
logsigs_or_sigs_types = ["sigs"]
signature_level = [4, 5]

In [None]:
parameters = [
    {"name": "scaler_type", "type": "choice", "values": scaler_types, "is_ordered": False},
    {"name": "logsigs_or_sigs", "type": "choice", "values": logsigs_or_sigs_types, "is_ordered": False},
    {"name": "signature_level", "type": "choice", "values": signature_level, "is_ordered": True},
    {"name": "lr", "type": "range", "bounds": [1e-4, 1e-3]},
    {"name": "weight_decay", "type": "range", "bounds": [1e-5, 1e-3]},
    {"name": "dropout", "type": "choice", "values": [0.5], "is_ordered": True},
    {"name": "early_stopping_epochs", "type": "range", "bounds": [40, 50]},
    {"name": "classifier_input_dim", "type": "choice", "values": [128, 256, 512], "is_ordered": True},
    {"name": "hidden_layer_dim", "type": "choice", "values": [128, 256, 512], "is_ordered": True},
]

In [None]:
# log file for all experiments and CV scores
log_file = "hyperparameter_search_log.csv"
if not os.path.exists(log_file):
    with open(log_file, "w") as f:
        f.write("scaler_type,logsigs_or_sigs,signature_level,lr,weight_decay,dropout,early_stopping_epochs,classifier_input_dim,hidden_layer_dim,CV_score\n")

In [None]:
def eval_function(parameters):
    scaler_type = parameters.get("scaler_type")
    logsigs_or_sigs = parameters.get("logsigs_or_sigs")
    signature_level = parameters.get("signature_level")

    TRAIN_SIGNATURES_FILE = f"{TRAIN_SIGNATURES_DIR}all_{logsigs_or_sigs}_lvl_{signature_level}_scaler_{scaler_type}_experts_augmented.pt"
    signature_features = torch.load(TRAIN_SIGNATURES_FILE)
    dataset = TrainDataset(train_metadata, signature_features)
    scores, train_losses, test_losses = CV_score(dataset, parameters.get("lr"), parameters.get("weight_decay"), parameters.get("dropout"), parameters.get("classifier_input_dim"), parameters.get("hidden_layer_dim"), device, criterion, parameters.get("early_stopping_epochs"))
    with open(log_file, "a") as f:
        f.write(f"{scaler_type},{logsigs_or_sigs},{signature_level},{parameters.get('lr')},{parameters.get('weight_decay')},{parameters.get('dropout')},{parameters.get('early_stopping_epochs')},{parameters.get('classifier_input_dim')},{parameters.get('hidden_layer_dim')},{np.mean(scores)}\n")
    return -np.mean(scores)

In [None]:
best_parameters, values, experiment, model = optimize(
    parameters=parameters,
    evaluation_function=eval_function,
    objective_name='CV_score',
    total_trials=50,
)

In [None]:
print(best_parameters)
print(values)

In [None]:
signature_level = 4
lr = 0.0006365430731326342
weight_decay = 0.00016552539309486747
dropout = 0.5
early_stopping_epochs = 12
classifier_input_dim = 512
scaler_type = "meanvar_1.0"
logsigs_or_sigs = "sigs"
hidden_layer_dim = 256

In [None]:
eval_function({"scaler_type": scaler_type, "logsigs_or_sigs": logsigs_or_sigs, "signature_level": signature_level, "lr": lr, "weight_decay": weight_decay, "dropout": dropout, "early_stopping_epochs": early_stopping_epochs, "classifier_input_dim": classifier_input_dim, "hidden_layer_dim": hidden_layer_dim})