# BERT4Rec Training Demo



## 1. Import Required Libraries

In [1]:
import torch
from options import args
from models import model_factory
from dataloaders import dataloader_factory
from trainers import trainer_factory
from utils import *

## 2. Configure Training Parameters

We'll set up the same configuration as used in the BERT4Rec template.

In [2]:
# Set basic configuration
args.mode = 'train'

# Dataset selection
dataset_choice = "1" # input('Input 1 for ml-1m, 20 for ml-20m: ')
# args.dataset_code = f'ml-{dataset_choice}m'
# args.min_rating = 0 if args.dataset_code == 'ml-1m' else 4
args.dataset_code = 'kion'
args.min_rating = 0 
args.min_uc = 5
args.min_sc = 0
args.split = 'leave_one_out'

# Dataloader configuration
args.dataloader_code = 'bert'
batch = 128
args.train_batch_size = batch
args.val_batch_size = batch
args.test_batch_size = batch

# Negative sampling configuration
args.train_negative_sampler_code = 'random'
args.train_negative_sample_size = 0
args.train_negative_sampling_seed = 0
args.test_negative_sampler_code = 'random'
args.test_negative_sample_size = 100
args.test_negative_sampling_seed = 98765

# Training configuration
args.trainer_code = 'bert'
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.num_gpu = 1
args.device_idx = '0'
args.optimizer = 'Adam'
args.lr = 0.001
args.enable_lr_schedule = True
args.decay_step = 25
args.gamma = 1.0
args.num_epochs = 100 if args.dataset_code == 'ml-1m' else 200
args.metric_ks = [1, 5, 10, 20, 50, 100]
args.best_metric = 'NDCG@10'

# Model configuration
args.model_code = 'bert'
args.model_init_seed = 0
args.bert_dropout = 0.1
args.bert_hidden_units = 256
args.bert_mask_prob = 0.15
args.bert_max_len = 100
args.bert_num_blocks = 2
args.bert_num_heads = 4

## 3. Setup Training Environment

In [3]:
# Setup training directory and logging
export_root = setup_train(args)
print(f"Training logs and model checkpoints will be saved to: {export_root}")

Folder created: /Users/arimmean/Desktop/BERT4Rec-VAE-Pytorch/experiments/test_2025-05-19_12
{'anneal_cap': 0.2,
 'bert_dropout': 0.1,
 'bert_hidden_units': 256,
 'bert_mask_prob': 0.15,
 'bert_max_len': 100,
 'bert_num_blocks': 2,
 'bert_num_heads': 4,
 'best_metric': 'NDCG@10',
 'dae_dropout': 0.5,
 'dae_hidden_dim': 600,
 'dae_latent_dim': 200,
 'dae_num_hidden': 0,
 'dataloader_code': 'bert',
 'dataloader_random_seed': 0.0,
 'dataset_code': 'kion',
 'dataset_split_seed': 98765,
 'decay_step': 25,
 'device': 'cpu',
 'device_idx': '0',
 'enable_lr_schedule': True,
 'eval_set_size': 500,
 'experiment_description': 'test',
 'experiment_dir': 'experiments',
 'find_best_beta': True,
 'gamma': 1.0,
 'log_period_as_iter': 12800,
 'lr': 0.001,
 'metric_ks': [1,
               5,
               10,
               20,
               50,
               100],
 'min_rating': 0,
 'min_sc': 0,
 'min_uc': 5,
 'mode': 'train',
 'model_code': 'bert',
 'model_init_seed': 0,
 'num_epochs': 200,
 'num_gp

## 4. Prepare Data

In [4]:
# Create dataloaders
train_loader, val_loader, test_loader = dataloader_factory(args)
print(f"Dataset: {args.dataset_code}")
print(f"Training batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

Already preprocessed. Skip preprocessing
Negatives samples exist. Loading.
Negatives samples exist. Loading.
Dataset: kion
Training batches: 2364
Validation batches: 2364
Test batches: 2364


## 5. Initialize Model

In [5]:
# Create model
model = model_factory(args)
print(f"Model initialized with {sum(p.numel() for p in model.parameters())} parameters")

Model initialized with 9549181 parameters


## 6. Initialize Trainer

In [6]:
# Create trainer
trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root)
print("Trainer initialized with the following configuration:")
print(f"- Optimizer: {args.optimizer}")
print(f"- Learning rate: {args.lr}")
print(f"- Number of epochs: {args.num_epochs}")
print(f"- Best metric: {args.best_metric}")

Trainer initialized with the following configuration:
- Optimizer: Adam
- Learning rate: 0.001
- Number of epochs: 200
- Best metric: NDCG@10


## 7. Train Model

In [7]:
# Start training
trainer.train()

Val: N@1 0.008, N@5 0.022, N@10 0.036, R@1 0.008, R@5 0.037, R@10 0.080, M@1 0.008, M@5 0.017, M@10 0.023, V@1 0.000, V@5 0.000, V@10 0.000:  30%|██▉       | 699/2364 [04:07<09:49,  2.82it/s]


KeyboardInterrupt: 

## 8. Test Model (Optional)

In [8]:
# Ask user if they want to run test set evaluation
test_model = (input('Test model with test dataset? y/[n]: ') == 'y')
if test_model:
    trainer.test()

Test best model with test set!


FileNotFoundError: [Errno 2] No such file or directory: 'experiments/test_2025-05-19_12/models/best_acc_model.pth'