In [2]:
import torch
import sys
import os

# Get the absolute path of the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Add to sys.path if not already there
if project_root not in sys.path:
    sys.path.append(project_root)
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.loggers import MLFlowLogger
from model_utils import Model, ClassificationData

print(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("Using device:", torch.cuda.get_device_name(0))
print("VRAM:", round(torch.cuda.get_device_properties(0).total_memory / 1024 ** 3), "GB")

cuda
Using device: NVIDIA GeForce RTX 4060 Ti
VRAM: 8 GB


In [None]:
torch.set_float32_matmul_precision("medium")
hyperparams = {
   "learning_rate": [1e-1, 1e-2, 1e-3],
   "batch_size": [1024, 512, 256],
    "dropout": [0.2, 0.3, 0.4],
   "weight_decay": [1e-1, 1e-2, 1e-3],
}
runs = 10
example_hyperparams = {
    "learning_rate": 1e-5,
    "batch_size": 128,
    "dropout": 0.4,
    "weight_decay": 1e-3,
}
# iterate over all hyperparameters
for lr in hyperparams["learning_rate"]:
    for bs in hyperparams["batch_size"]:
        for wd in hyperparams["weight_decay"]:
            for dp in hyperparams["dropout"]:
                example_hyperparams["learning_rate"] = lr
                example_hyperparams["batch_size"] = bs
                example_hyperparams["weight_decay"] = wd
                example_hyperparams["dropout"] = dp
                for i in range(runs):
                    seed = 123 + i
                    L.seed_everything(seed)
                    torch.cuda.empty_cache()
                    model = Model(example_hyperparams)
                    logger = MLFlowLogger(save_dir="mlruns", experiment_name="CNN")
                    early_stop = EarlyStopping(monitor="train_loss", patience=3, mode="min", verbose=True, min_delta=0.01)
                    checkpoint_callback = ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints", filename=f"run_{i}_lr={model.hyperparameters["learning_rate"]}_bs={model.hyperparameters["batch_size"]}_wd={model.hyperparameters["weight_decay"]}_dropout={model.hyperparameters["dropout"]}_seed={seed}" + "-{epoch:02d}-{val_f1_macro:.2f}")
                    trainer = L.Trainer(max_epochs=20, logger=logger, num_sanity_val_steps=0, enable_model_summary=False, deterministic=False, callbacks=[early_stop, checkpoint_callback], precision=16)
                    data = ClassificationData(batch_size=model.hyperparameters["batch_size"])
                    trainer.fit(model, datamodule=data)
                    trainer.test(model, datamodule=data)

Epoch 7: 100%|██████████| 88/88 [00:15<00:00,  5.78it/s, v_num=2f76]

Metric train_loss improved by 0.026 >= min_delta = 0.01. New best score: 1.379


Epoch 9: 100%|██████████| 88/88 [00:14<00:00,  5.87it/s, v_num=2f76]

Metric train_loss improved by 0.021 >= min_delta = 0.01. New best score: 1.358


Epoch 11: 100%|██████████| 88/88 [00:15<00:00,  5.85it/s, v_num=2f76]

Metric train_loss improved by 0.010 >= min_delta = 0.01. New best score: 1.348


Epoch 14: 100%|██████████| 88/88 [00:15<00:00,  5.71it/s, v_num=2f76]

Monitored metric train_loss did not improve in the last 3 records. Best score: 1.348. Signaling Trainer to stop.


Epoch 14: 100%|██████████| 88/88 [00:15<00:00,  5.71it/s, v_num=2f76]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 88/88 [00:07<00:00, 12.49it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.8889033198356628
      test_f1_macro         0.4766843318939209
     test_precision         0.5126007199287415
       test_recall          0.4878888726234436
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Seed set to 132
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 0: 100%|██████████| 88/88 [00:30<00:00,  2.88it/s, v_num=568f]

Metric train_loss improved. New best score: 1.723


Epoch 1: 100%|██████████| 88/88 [00:15<00:00,  5.66it/s, v_num=568f]

Metric train_loss improved by 0.181 >= min_delta = 0.01. New best score: 1.542


Epoch 2: 100%|██████████| 88/88 [00:14<00:00,  5.87it/s, v_num=568f]

Metric train_loss improved by 0.063 >= min_delta = 0.01. New best score: 1.478


Epoch 3: 100%|██████████| 88/88 [00:15<00:00,  5.85it/s, v_num=568f]

Metric train_loss improved by 0.036 >= min_delta = 0.01. New best score: 1.442


Epoch 4: 100%|██████████| 88/88 [00:15<00:00,  5.81it/s, v_num=568f]

Metric train_loss improved by 0.033 >= min_delta = 0.01. New best score: 1.408


Epoch 5: 100%|██████████| 88/88 [00:14<00:00,  5.93it/s, v_num=568f]

Metric train_loss improved by 0.011 >= min_delta = 0.01. New best score: 1.397


Epoch 6: 100%|██████████| 88/88 [00:14<00:00,  5.91it/s, v_num=568f]

Metric train_loss improved by 0.020 >= min_delta = 0.01. New best score: 1.378


Epoch 7: 100%|██████████| 88/88 [00:15<00:00,  5.84it/s, v_num=568f]

Metric train_loss improved by 0.015 >= min_delta = 0.01. New best score: 1.363


Epoch 9: 100%|██████████| 88/88 [00:15<00:00,  5.74it/s, v_num=568f]

Metric train_loss improved by 0.017 >= min_delta = 0.01. New best score: 1.346


Epoch 12: 100%|██████████| 88/88 [00:15<00:00,  5.67it/s, v_num=568f]

Metric train_loss improved by 0.012 >= min_delta = 0.01. New best score: 1.334


Epoch 15: 100%|██████████| 88/88 [00:14<00:00,  6.06it/s, v_num=568f]

Monitored metric train_loss did not improve in the last 3 records. Best score: 1.334. Signaling Trainer to stop.


Epoch 15: 100%|██████████| 88/88 [00:14<00:00,  6.05it/s, v_num=568f]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 88/88 [00:06<00:00, 13.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.8945730328559875
      test_f1_macro          0.480291485786438
     test_precision         0.5143482685089111
       test_recall          0.49674445390701294
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Seed set to 123
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 0: 100%|██████████| 88/88 [00:30<00:00,  2.90it/s, v_num=ab06]

Metric train_loss improved. New best score: 1.751


Epoch 1: 100%|██████████| 88/88 [00:14<00:00,  6.09it/s, v_num=ab06]

Metric train_loss improved by 0.170 >= min_delta = 0.01. New best score: 1.581


Epoch 2: 100%|██████████| 88/88 [00:14<00:00,  6.10it/s, v_num=ab06]

Metric train_loss improved by 0.054 >= min_delta = 0.01. New best score: 1.527


Epoch 3: 100%|██████████| 88/88 [00:14<00:00,  6.11it/s, v_num=ab06]

Metric train_loss improved by 0.044 >= min_delta = 0.01. New best score: 1.483


Epoch 4: 100%|██████████| 88/88 [00:14<00:00,  6.07it/s, v_num=ab06]

Metric train_loss improved by 0.022 >= min_delta = 0.01. New best score: 1.461


Epoch 5: 100%|██████████| 88/88 [00:14<00:00,  6.04it/s, v_num=ab06]

Metric train_loss improved by 0.025 >= min_delta = 0.01. New best score: 1.436


Epoch 6: 100%|██████████| 88/88 [00:14<00:00,  6.00it/s, v_num=ab06]

Metric train_loss improved by 0.013 >= min_delta = 0.01. New best score: 1.423


Epoch 8: 100%|██████████| 88/88 [00:14<00:00,  5.99it/s, v_num=ab06]

Metric train_loss improved by 0.017 >= min_delta = 0.01. New best score: 1.406


Epoch 11: 100%|██████████| 88/88 [00:14<00:00,  6.12it/s, v_num=ab06]

Metric train_loss improved by 0.014 >= min_delta = 0.01. New best score: 1.392


Epoch 14: 100%|██████████| 88/88 [00:14<00:00,  5.96it/s, v_num=ab06]

Monitored metric train_loss did not improve in the last 3 records. Best score: 1.392. Signaling Trainer to stop.


Epoch 14: 100%|██████████| 88/88 [00:14<00:00,  5.96it/s, v_num=ab06]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 88/88 [00:07<00:00, 12.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.8780038952827454
      test_f1_macro         0.42906898260116577
     test_precision         0.4697403907775879
       test_recall          0.45525553822517395
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Seed set to 124
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 0: 100%|██████████| 88/88 [00:30<00:00,  2.87it/s, v_num=b17c]

Metric train_loss improved. New best score: 1.754


Epoch 1: 100%|██████████| 88/88 [00:14<00:00,  6.09it/s, v_num=b17c]

Metric train_loss improved by 0.168 >= min_delta = 0.01. New best score: 1.586


Epoch 2: 100%|██████████| 88/88 [00:14<00:00,  5.95it/s, v_num=b17c]

Metric train_loss improved by 0.057 >= min_delta = 0.01. New best score: 1.529


Epoch 3: 100%|██████████| 88/88 [00:14<00:00,  5.99it/s, v_num=b17c]

Metric train_loss improved by 0.042 >= min_delta = 0.01. New best score: 1.487


Epoch 4: 100%|██████████| 88/88 [00:14<00:00,  6.13it/s, v_num=b17c]

Metric train_loss improved by 0.028 >= min_delta = 0.01. New best score: 1.460


Epoch 5: 100%|██████████| 88/88 [00:14<00:00,  5.97it/s, v_num=b17c]

Metric train_loss improved by 0.021 >= min_delta = 0.01. New best score: 1.439


Epoch 6: 100%|██████████| 88/88 [00:14<00:00,  6.03it/s, v_num=b17c]

Metric train_loss improved by 0.015 >= min_delta = 0.01. New best score: 1.424


Epoch 7:  36%|███▋      | 32/88 [00:02<00:04, 11.43it/s, v_num=b17c]