In [1]:
import os
import json
import random
import module
import optuna
import numpy as np
import torch
from torch.utils.data import DataLoader

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


In [2]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['PYTHONHASHSEED'] = str(seed)

In [3]:
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.version.cuda)
print('학습을 진행하는 기기:', torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

torch.set_printoptions(precision=4, sci_mode=False)
torch.backends.cudnn.enabled = False
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = True

2.2.1
True
1
12.1
학습을 진행하는 기기: cuda:0


Data

In [4]:
train_dataset = torch.load("processed/datasets.pt")['ABC']['train']
valid_dataset = torch.load("processed/datasets.pt")['ABC']['valid']

In [5]:
dataloaders = []
dataset_names = ["train", "valid"]

for name in dataset_names:
    dataloader = DataLoader(eval(name + "_dataset"), batch_size=1, shuffle=True, drop_last=False)
    dataloaders.append(dataloader)

train_dataloader, valid_dataloader = dataloaders

Overweight for positive weight

In [6]:
train = torch.load("processed/splits_stage.pt")['train']
valid = torch.load("processed/splits_stage.pt")['valid']
train_id = torch.load("processed/splits_stay.pt")['train']
valid_id = torch.load("processed/splits_stay.pt")['valid']

In [7]:
pos_weights_main_train = module.compute_pos_weights_presence(train)
rrt_weight_train = module.compute_rrt_pos_weight(train_id)
pos_weights_sub_train = module.compute_pos_weights_stage(
    train,
    stage_cols=[f"GT_stage_{s}" for s in [3, 2, 1]],
    rrt_weight=rrt_weight_train
)

In [8]:
pos_weights_main_valid = module.compute_pos_weights_presence(valid)
rrt_weight_valid = module.compute_rrt_pos_weight(valid_id)
pos_weights_sub_valid = module.compute_pos_weights_stage(
    valid,
    stage_cols=[f"GT_stage_{s}" for s in [3, 2, 1]],
    rrt_weight=rrt_weight_valid
)

In [9]:
pos_weights = [pos_weights_main_train, pos_weights_sub_train, pos_weights_main_valid, pos_weights_sub_valid]

# Train

In [10]:
if __name__ == "__main__":
    study  = optuna.create_study(direction="minimize")
    study.optimize(
    lambda trial: module.objective(trial, train_dataloader, valid_dataloader, pos_weights, device),
    n_trials=30
    )

    print("Best Hyperparameters:", study.best_params)
    print("Best Validation Loss:", study.best_value)

[I 2025-04-23 13:14:03,244] A new study created in memory with name: no-name-065801e2-14eb-46ab-8825-4fc6498cb146
[I 2025-04-23 13:38:05,766] Trial 0 finished with value: 0.5942367315292358 and parameters: {'hidden_size': 100, 'embedding_size': 100, 'recurrent_num_layers': 2, 'embedding_num_layers': 3, 'CB': 0, 'recurrent_type': 'GRU', 'activation_type': 'LeakyReLU', 'batchsize': 256, 'learning_rate': 0.01, 'lr_decay_steps': 800, 'lr_decay_factor': 0.85, 'highway_network': 1, 'LD': 1, 'LN': 1, 'CDF': 1}. Best is trial 0 with value: 0.5942367315292358.
[I 2025-04-23 14:56:40,594] Trial 1 finished with value: 0.7597103714942932 and parameters: {'hidden_size': 200, 'embedding_size': 50, 'recurrent_num_layers': 1, 'embedding_num_layers': 5, 'CB': 0, 'recurrent_type': 'GRU', 'activation_type': 'ReLU', 'batchsize': 64, 'learning_rate': 0.001, 'lr_decay_steps': 800, 'lr_decay_factor': 0.85, 'highway_network': 0, 'LD': 1, 'LN': 1, 'CDF': 0}. Best is trial 0 with value: 0.5942367315292358.
[I 2

Best Hyperparameters: {'hidden_size': 50, 'embedding_size': 50, 'recurrent_num_layers': 2, 'embedding_num_layers': 3, 'CB': 0, 'recurrent_type': 'GRU', 'activation_type': 'ELU', 'batchsize': 512, 'learning_rate': 0.01, 'lr_decay_steps': 800, 'lr_decay_factor': 0.85, 'highway_network': 1, 'LD': 1, 'LN': 1, 'CDF': 1}
Best Validation Loss: 0.5202135443687439


In [11]:
best_params_path = "model/best_params.json"
os.makedirs("model", exist_ok=True)

with open(best_params_path, "w") as fp:
    json.dump(study.best_params, fp, indent=2)

best_trial_num   = study.best_trial.number + 1 
best_ckpt_path   = f"model/trial_{best_trial_num}_model.pt"

with open("model/best_ckpt_path.txt", "w") as fp:
    fp.write(best_ckpt_path)

print("✅ Optuna Completed")
print("  • Best params saved  ➜", best_params_path)
print("  • Best model saved   ➜", best_ckpt_path)
print("  • Best valid loss    ➜", study.best_value)

✅ Optuna Completed
  • Best params saved  ➜ model/best_params.json
  • Best model saved   ➜ model/trial_22_model.pt
  • Best valid loss    ➜ 0.5202135443687439
