In [4]:
from google.colab import drive
# colab 사용시
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
import sys

sys.path.append('/content/drive/MyDrive/Colab_Notebooks/Recommendation_system/team_project')  # sasrec_plus.py가 있는 경로 입력

In [20]:
import sys
import importlib

module_name = "sasrec_plus"  # 수정한 .py 파일 이름(확장자 제외)

if module_name in sys.modules:
    del sys.modules[module_name]

importlib.invalidate_caches()
importlib.import_module(module_name)

<module 'sasrec_plus' from '/content/drive/MyDrive/Colab_Notebooks/Recommendation_system/team_project/sasrec_plus.py'>

In [21]:
import torch
from sasrec_plus import SASRecPipeline
from sasrec_plus import set_seed

In [None]:
나의 말:
def train_sasrec_with_ndcg(
    pipeline: SASRecPipeline,
    num_epochs: int,
    k: int = 10,
    num_negatives: int = 100,
    eval_every: int = 1,
    save_path: str = "checkpoints/",
    early_stop_patience: int | None = None,
    print_loss: bool = True,
    arg_num = None
):
    """
    NDCG@K 기준으로 best model을 저장하는 학습 루프.

    pipeline : SASRecPipeline 인스턴스
    num_epochs : 총 학습 epoch 수
    k : Hit@K / NDCG@K 에서의 K (기본 10)
    num_negatives : 논문 스타일 negative 개수 (기본 100)
    eval_every : 몇 epoch마다 validation 평가할지 (기본 1 = 매 epoch)
    save_path : best NDCG 모델을 저장할 경로
    early_stop_patience : 개선 없을 때 멈출 epoch 수 (None이면 사용 안 함)
    print_loss : train loop에서 loss 출력 여부
    """
    
    if arg_num is None:
        last_save_path = save_path + f"sasrec_last_{num_epochs}epoch.pth"
    else:
        last_save_path = save_path + f"sasrec_last_{num_epochs}epoch_{arg_num}.pth"
    
    if arg_num is None:
        save_path = save_path + "sasrec_best_ndcg.pt"
    else:
        save_path = save_path + f"sasrec_best_ndcg_{arg_num}.pt"

    start_epoch = pipeline.epoch + 1       # pipeline 안에 저장된 마지막 epoch 다음부터
    end_epoch = num_epochs

    best_ndcg = -1.0
    best_epoch = pipeline.epoch
    epochs_no_improve = 0

    print(f"[INFO] Training from epoch {start_epoch} to {end_epoch} (current={pipeline.epoch})")

    for epoch in range(start_epoch, end_epoch + 1):
        # 1) 한 epoch 학습
        pipeline.model_train(target_epoch=epoch, print_loss=print_loss)

        # 2) eval_every마다 validation NDCG 측정
        if (epoch - start_epoch + 1) % eval_every == 0:
            pipeline.model_validate()
            ndcg_k = pipeline.val_ndcg[epoch][1]

            # 3) best NDCG 갱신 시 체크포인트 저장
            if ndcg_k > best_ndcg:
                best_ndcg = ndcg_k
                best_epoch = epoch
                epochs_no_improve = 0

                pipeline.save_model(save_path)
                print(f"[BEST] New best NDCG@{k}: {best_ndcg:.4f} at epoch {best_epoch}")
            else:
                epochs_no_improve += 1

            # 4) early stopping
            if early_stop_patience is not None and epochs_no_improve >= early_stop_patience:
                print(f"[EARLY STOP] No improvement in NDCG for {early_stop_patience} eval steps.")
                break
    pipeline.save_model(last_save_path)
    print(f"[DONE] Training finished. Best NDCG@{k}: {best_ndcg:.4f} at epoch {best_epoch}")
    print(f"[DONE] Best model saved to: {save_path}")
    print(f"[DONE] Last model saved to: {last_save_path}")

In [None]:
# json_path에 dataset이 있는 파일 위치 경로 입력

In [18]:
args1 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  42,
        'dropout': 0.2,
        'share_embd': True,
        'fixed_pos_embd': 0,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args2 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  42,     ###### 42 -> 168
        'dropout': 0.2,
        'share_embd': True,
        'fixed_pos_embd': 1,  # 0: Do nothing / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args3 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  42,
        'dropout': 0.2,
        'share_embd': True,
        'fixed_pos_embd': 2,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args4 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  168,
        'dropout': 0.2,
        'share_embd': True,
        'fixed_pos_embd': 0,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args5 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  168,
        'dropout': 0.2,
        'share_embd': True,
        'fixed_pos_embd': 1,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [5]:
args6 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  168,
        'dropout': 0.2,
        'share_embd': True,
        'fixed_pos_embd': 2,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args7 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  42,
        'dropout': 0.2,
        'share_embd': False,
        'fixed_pos_embd': 0,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args8 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  42,     ###### 42 -> 168
        'dropout': 0.2,
        'share_embd': False,
        'fixed_pos_embd': 1,  # 0: Do nothing / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args9 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  42,
        'dropout': 0.2,
        'share_embd': False,
        'fixed_pos_embd': 2,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args10 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  168,
        'dropout': 0.2,
        'share_embd': False,
        'fixed_pos_embd': 0,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args11 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  168,
        'dropout': 0.2,
        'share_embd': False,
        'fixed_pos_embd': 1,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
args12 = {
        'max_len': 50,
        'batch_size': 128,
        'split_ratio': (0.8, 0.1, 0.1),
        'd_model': 42,
        'n_layers': 2,
        'd_ff':  168,
        'dropout': 0.2,
        'share_embd': False,
        'fixed_pos_embd': 2,  # 0: 안함 / 1: add fixed embd / 2: concat fixed embd
        'pad_id': 0,
        'lr': 1e-3,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'json_path': None,
        'num_workers': 0,
        }

In [None]:
set_seed(42)

args = None ## input arg1 or arg2 or .....

pipe = SASRecPipeline(**args)

train_sasrec_with_ndcg(pipeline=pipe, num_epochs=200)

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device
# device name is cpu, not cuda, it will be very slow....

'cpu'