In [None]:
import torch
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

if project_root not in sys.path:
    sys.path.append(project_root)
    
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.loggers import MLFlowLogger
from model_utils import Model, ClassificationData

print(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("Using device:", torch.cuda.get_device_name(0))
print("VRAM:", round(torch.cuda.get_device_properties(0).total_memory / 1024 ** 3), "GB")

cuda
Using device: NVIDIA GeForce RTX 4060 Ti
VRAM: 8 GB


In [None]:
torch.set_float32_matmul_precision("medium")
hyperparams = {
   "learning_rate": [1e-1],
   "batch_size": [512],
    "dropout": [0.2, 0.3, 0.4],
   "weight_decay": [1e-3],
}
runs = 10
example_hyperparams = {
    "learning_rate": 1e-5,
    "batch_size": 128,
    "dropout": 0.4,
    "weight_decay": 1e-3,
}
# iterate over all hyperparameters
for lr in hyperparams["learning_rate"]:
    for bs in hyperparams["batch_size"]:
        for wd in hyperparams["weight_decay"]:
            for dp in hyperparams["dropout"]:
                example_hyperparams["learning_rate"] = lr
                example_hyperparams["batch_size"] = bs
                example_hyperparams["weight_decay"] = wd
                example_hyperparams["dropout"] = dp
                for i in range(runs):
                    seed = 123 + i
                    L.seed_everything(seed)
                    torch.cuda.empty_cache()
                    model = Model(example_hyperparams)
                    logger = MLFlowLogger(save_dir="mlruns", experiment_name="CNN")
                    early_stop = EarlyStopping(monitor="train_loss", patience=3, mode="min", verbose=True, min_delta=0.01)
                    checkpoint_callback = ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints", filename=f"run_{i}_lr={model.hyperparameters["learning_rate"]}_bs={model.hyperparameters["batch_size"]}_wd={model.hyperparameters["weight_decay"]}_dropout={model.hyperparameters["dropout"]}_seed={seed}" + "-{epoch:02d}-{val_f1_macro:.2f}")
                    trainer = L.Trainer(max_epochs=20, logger=logger, num_sanity_val_steps=0, enable_model_summary=False, deterministic=False, callbacks=[early_stop, checkpoint_callback], precision="16-mixed")
                    data = ClassificationData(batch_size=model.hyperparameters["batch_size"])
                    trainer.fit(model, datamodule=data)
                    trainer.test(model, datamodule=data)