In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import argparse
import pickle
import torch
from torch.utils.data import DataLoader
from torch import optim, nn
from tqdm import tqdm
import os
from torch.cuda.amp import autocast, GradScaler


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


Dataset

In [3]:
class ItemSequenceDataset:
    def __init__(self, filepath, max_length):
        import pandas as pd
        df = pd.read_csv(filepath, names=['user_id', 'item_id'], usecols=[0, 1])
        self.num_users = df['user_id'].max() + 1
        self.num_items = df['item_id'].max() + 1

        # 按用户分组
        self.all_records = [[] for _ in range(self.num_users)]
        for _, row in tqdm(df.iterrows(), desc="Loading data"):
            user_id, item_id = row.iloc[0], row.iloc[1]
            self.all_records[user_id].append(item_id)

        print(f'# Users: {self.num_users}')
        print(f'# Items: {self.num_items}')
        print(f'# Interactions: {len(df)}')

        # 准备训练/验证/测试数据
        X_train, y_train = [], []
        X_valid, y_valid = [], []
        X_test, y_test = [], []

        for seq in tqdm(self.all_records, desc="Preparing sequences"):
            if len(seq) < 3:
                continue

            # 训练集: 使用前n-2个
            train_seq = seq[:-2]
            if len(train_seq) < max_length:
                X_train.append([self.num_items] * (max_length - len(train_seq) + 1) + train_seq[:-1])
                y_train.append([self.num_items] * (max_length - len(train_seq) + 1) + train_seq[1:])
            else:
                for i in range(len(train_seq) - max_length):
                    X_train.append(train_seq[i:i+max_length])
                    y_train.append(train_seq[i+1:i+max_length+1])

            # 验证集: 使用前n-1个预测第n-1个
            valid_seq = seq[:-1]
            if len(valid_seq) - 1 < max_length:
                X_valid.append([self.num_items] * (max_length - len(valid_seq) + 1) + valid_seq[:-1])
            else:
                X_valid.append(valid_seq[-(max_length+1):-1])
            y_valid.append(valid_seq[-1])

            # 测试集: 使用前n个预测第n个
            test_seq = seq
            if len(test_seq) - 1 < max_length:
                X_test.append([self.num_items] * (max_length - len(test_seq) + 1) + test_seq[:-1])
            else:
                X_test.append(test_seq[-(max_length+1):-1])
            y_test.append(test_seq[-1])

        self.X_train = torch.tensor(X_train)
        self.y_train = torch.tensor(y_train)
        self.X_valid = torch.tensor(X_valid)
        self.y_valid = torch.tensor(y_valid)
        self.X_test = torch.tensor(X_test)
        self.y_test = torch.tensor(y_test)

        print('Data loading completed.')

    def __len__(self):
        return len(self.X_train)

    def __getitem__(self, idx):
        return self.X_train[idx], self.y_train[idx]

Model & Metrics definition

In [4]:
class UnidirectionalSelfAttention(nn.Module):
    def __init__(self, embedding_dim):
        super().__init__()
        self.query = nn.Linear(embedding_dim, embedding_dim, bias=False)
        self.key = nn.Linear(embedding_dim, embedding_dim, bias=False)
        self.value = nn.Linear(embedding_dim, embedding_dim, bias=False)

    def forward(self, x):
        seq_len, embedding_dim = x.size(1), x.size(2)
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)

        attention_scores = torch.matmul(Q, K.transpose(-2, -1)) / (embedding_dim ** 0.5)
        mask = torch.tril(torch.ones(seq_len, seq_len)).bool().unsqueeze(0).to(x.device)
        attention_scores = attention_scores.masked_fill(~mask, float('-inf'))
        attention_weights = torch.softmax(attention_scores, dim=-1)
        attention_output = torch.matmul(attention_weights, V)

        return attention_output


class TransformerLayer(nn.Module):
    def __init__(self, embedding_dim, dropout):
        super().__init__()
        self.attn = UnidirectionalSelfAttention(embedding_dim)
        self.feed_forward = nn.Sequential(
            nn.Linear(embedding_dim, embedding_dim),
            nn.ReLU(),
            nn.Linear(embedding_dim, embedding_dim)
        )
        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        attn_output = self.attn(x)
        x = self.layer_norm(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.layer_norm(x + self.dropout(ff_output))
        return x


class SASRec(nn.Module):
    def __init__(self, num_items, embedding_dim=64, max_length=50, num_layers=2, dropout=0.2):
        super().__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim, padding_idx=num_items)
        self.position_embedding = nn.Embedding(max_length, embedding_dim)
        self.attn_layers = nn.ModuleList([
            TransformerLayer(embedding_dim, dropout) for _ in range(num_layers)
        ])
        self._init_weights()

    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.normal_(module.weight, std=0.001)
                if module.bias is not None:
                    nn.init.zeros_(module.bias)
            elif isinstance(module, nn.Embedding):
                nn.init.normal_(module.weight, std=0.001)
                if module.padding_idx is not None:
                    module.weight.data[module.padding_idx].zero_()
            elif isinstance(module, nn.LayerNorm):
                nn.init.zeros_(module.bias)
                nn.init.ones_(module.weight)

    def forward(self, item_ids):
        batch_size, seq_len = item_ids.shape
        positions = torch.arange(seq_len, device=item_ids.device).unsqueeze(0).repeat(batch_size, 1)

        item_embeds = self.item_embedding(item_ids)
        pos_embeds = self.position_embedding(positions)
        x = item_embeds + pos_embeds

        for attn_layer in self.attn_layers:
            x = attn_layer(x)

        logits = torch.matmul(x, self.item_embedding.weight[:-1].T)
        return logits


# ==================== 评估工具 ====================
class Metrics:
    def __init__(self, topk_list):
        self.topk_list = topk_list
        self.hit_total = {k: 0 for k in topk_list}
        self.ndcg_total = {k: 0 for k in topk_list}
        self.mrr_total = {k: 0 for k in topk_list}
        self.rec_list = []
        self.total_nums = 0

    def accumulate(self, ranks_list, y):
        import math
        for i, (ranks, true_item) in enumerate(zip(ranks_list, y)):
            if true_item in ranks:
                rank = ranks.index(true_item) + 1
                self.rec_list.append((self.total_nums + i, ranks, true_item))
                for k in self.topk_list:
                    if rank <= k:
                        self.hit_total[k] += 1
                        self.ndcg_total[k] += 1 / math.log2(rank + 1)
                        self.mrr_total[k] += 1 / rank
        self.total_nums += len(y)

    def get(self):
        hit = {k: self.hit_total[k] / self.total_nums for k in self.topk_list}
        ndcg = {k: self.ndcg_total[k] / self.total_nums for k in self.topk_list}
        mrr = {k: self.mrr_total[k] / self.total_nums for k in self.topk_list}
        return hit, ndcg, mrr, self.rec_list


def get_top_k_recommendations(scores, all_records, k, phase):
    delta = 2 if phase == 'valid' else 1
    for idx, interacted_items in enumerate(all_records):
        scores[idx, interacted_items[:-delta]] = -torch.inf
    _, top_indices = torch.topk(scores, k, dim=1)
    return top_indices

Train and Test

In [5]:
def train_one_epoch(dataloader, model, loss_func, optimizer, epoch, device,
                    use_amp=False, accumulation_steps=1):
    """训练一个epoch（支持混合精度和梯度累积）"""
    model.train()
    train_loss = 0
    num_batches = len(dataloader)

    # 混合精度训练的scaler
    scaler = GradScaler() if use_amp else None

    pbar = tqdm(dataloader, desc=f"Epoch {epoch}")
    optimizer.zero_grad()

    for batch_idx, (X, y) in enumerate(pbar):
        X, y = X.to(device), y.to(device)

        # 混合精度前向传播
        if use_amp:
            with autocast(): # Removed 'cuda'
                logits = model(X)
                logits = logits.view(-1, logits.size(2))
                y_flat = y.view(-1)
                loss = loss_func(logits, y_flat) / accumulation_steps
        else:
            logits = model(X)
            logits = logits.view(-1, logits.size(2))
            y_flat = y.view(-1)
            loss = loss_func(logits, y_flat) / accumulation_steps

        train_loss += loss.item() * accumulation_steps

        # 反向传播
        if use_amp:
            scaler.scale(loss).backward()
        else:
            loss.backward()

        # 梯度更新
        if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == num_batches:
            if use_amp:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()

            # 清理GPU缓存
            if (batch_idx + 1) % (accumulation_steps * 10) == 0:
                torch.cuda.empty_cache()

        pbar.set_postfix({'loss': f'{train_loss/(batch_idx+1):.4f}'})

    avg_loss = train_loss / num_batches
    return avg_loss


# 修改evaluate函数，分批处理
def evaluate(dataset, model, device, batch_size, topk_list, phase):
    """在验证集或测试集上评估（减少内存使用）"""
    X_all = dataset.X_valid if phase == 'valid' else dataset.X_test
    y_all = dataset.y_valid if phase == 'valid' else dataset.y_test

    model.eval()
    metrics = Metrics(topk_list)

    # 减小评估batch size
    eval_batch_size = min(batch_size, 32)  # 最多32

    with torch.no_grad():
        start = 0
        pbar = tqdm(total=len(y_all), desc=f"Evaluating {phase}")

        while start < len(y_all):
            end = min(start + eval_batch_size, len(y_all))
            X = X_all[start:end].to(device)
            y = y_all[start:end]

            # Predict
            scores = model(X)[:, -1, :]

            # Get Top-K recommendations
            ranks_list = get_top_k_recommendations(
                scores, dataset.all_records[start:end], max(topk_list), phase
            )

            # Accumulate metrics
            metrics.accumulate(ranks_list.tolist(), y.tolist())

            # Clean up
            del X, scores, ranks_list

            start = end
            pbar.update(end - start)

            # Periodically clean up cache
            if start % (eval_batch_size * 10) == 0:
                torch.cuda.empty_cache()

        pbar.close()

    hit, ndcg, mrr, rec_list = metrics.get()

    print(f'\n[{phase.upper()}]')
    print(f"Hit@{topk_list}: {hit}")
    print(f"NDCG@{topk_list}: {ndcg}")
    print(f"MRR@{topk_list}: {mrr}")

    return hit, ndcg, mrr, rec_list


# Modify train_model call
def train_model(config, dataset, model, dataloader, optimizer, loss_func):
    patience = config.NUM_PATIENCE
    best_ndcg_valid = 0.0
    best_epoch = 0
    best_hit = None
    best_ndcg = None

    print("\n" + "="*60)
    print("Start training")
    print("="*60)

    # Initial evaluation
    print("\nInitial evaluation (random):")
    evaluate(dataset, model, config.DEVICE, config.BATCH_SIZE, config.TOPK_LIST, 'valid')
    # evaluate(dataset, model, config.DEVICE, config.BATCH_SIZE, config.TOPK_LIST, 'test')

    for epoch in range(1, config.NUM_EPOCHS + 1):
        print(f"\n{'='*60}")
        print(f"Epoch {epoch}/{config.NUM_EPOCHS}")
        print('='*60)

        # Train
        train_loss = train_one_epoch(
            dataloader, model, loss_func, optimizer, epoch, config.DEVICE,
            use_amp=config.USE_AMP,
            accumulation_steps=config.ACCUMULATION_STEPS
        )
        print(f"\nTrain Loss: {train_loss:.4f}")

        # Evaluate
        hit_valid, ndcg_valid, mrr_valid, rec_list_valid = evaluate(
            dataset, model, config.DEVICE, config.BATCH_SIZE, config.TOPK_LIST, 'valid'
        )




        # Save best model
        current_ndcg = ndcg_valid[max(config.TOPK_LIST)]
        if current_ndcg >= best_ndcg_valid:
            patience = config.NUM_PATIENCE
            best_ndcg_valid = current_ndcg
            best_epoch = epoch

            # Save
            model_path = os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}.pth')
            torch.save(model.state_dict(), model_path)  # Save only weights to save space

            with open(os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}_rec_list_valid.pkl'), 'wb') as f:
                pickle.dump(rec_list_valid, f)
            with open(os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}_rec_list_test.pkl'), 'wb') as f:
                pickle.dump(rec_list_test, f)

            print(f"\n✅ Best model saved! NDCG@{max(config.TOPK_LIST)}: {best_ndcg_valid:.4f}")

            log_path = os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}.log')
            with open(log_path, 'w') as f:
                f.write(f'Best epoch: {best_epoch}\n')
                f.write(f'Valid - Hit: {hit_valid}, NDCG: {ndcg_valid}, MRR: {mrr_valid}\n')
                f.write(f'Test - Hit: {hit_test}, NDCG: {ndcg_test}, MRR: {mrr_test}\n')
        else:
            patience -= 1
            print(f"\nPatience: {patience}/{config.NUM_PATIENCE}")

            if patience == 0:
                print("\n⚠️ Early stopping!")
                break

        # Clean up after each epoch
        torch.cuda.empty_cache()

    print("\n" + "="*60)
    print("Training finished!")
    print(f"Best epoch: {best_epoch}")
    print(f"Best NDCG@{max(config.TOPK_LIST)}: {best_ndcg_valid:.4f}")
    print("="*60)

    return best_epoch, best_ndcg_valid

Config

In [6]:
class Config:
    # dataset name
    DATASET = 'Grocery_and_Gourmet_Food'
    # DATA_PATH = f'../data/{DATASET}.csv'
    DATA_PATH = f'/content/drive/MyDrive/11785IDL/IDL_Project/data/{DATASET}.csv' # Updated path

    # model config
    EMBEDDING_DIM = 128
    MAX_LENGTH = 32
    NUM_LAYERS = 2
    DROPOUT = 0.2

    # train config
    BATCH_SIZE = 64
    NUM_EPOCHS = 10 # Increased epochs
    NUM_PATIENCE = 5
    LR = 0.001
    TOPK_LIST = [10]

    USE_AMP = True               # 混合精度训练
    ACCUMULATION_STEPS = 4

    # device and seed
    DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    SEED = 2025

    # checkpoint directory
    CHECKPOINT_DIR = '/content/drive/MyDrive/11785IDL/IDL_Project/midterm_Baseline/checkpoint_baseline'

Load data

In [7]:
def load_data(config):
    """load data"""
    print("\n" + "="*60)
    print("loading data")
    print("="*60)
    print(f"from: {config.DATA_PATH}")

    dataset = ItemSequenceDataset(config.DATA_PATH, config.MAX_LENGTH)
    dataloader = DataLoader(
        dataset,
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        num_workers=0
    )

    print(f"training batch: {len(dataloader)}")

    return dataset, dataloader

Create model

In [8]:
def create_model(config, num_items):
    """SASRec model"""
    print("\n" + "="*60)
    print("Create model")
    print("="*60)

    model = SASRec(
        num_items=num_items,
        embedding_dim=config.EMBEDDING_DIM,
        max_length=config.MAX_LENGTH,
        num_layers=config.NUM_LAYERS,
        dropout=config.DROPOUT
    ).to(config.DEVICE)

    # 优化器和损失函数
    optimizer = optim.Adam(model.parameters(), lr=config.LR)
    loss_func = nn.CrossEntropyLoss(ignore_index=num_items)

    # Inspect model architecture and check to verify number of parameters of your network
    try:
        # Install and import torchsummaryX
        !pip install torchsummaryX==1.1.0
        from torchsummaryX import summary

        # Create a dummy input tensor
        dummy_input = torch.randint(0, num_items, (config.BATCH_SIZE, config.MAX_LENGTH), device=config.DEVICE)
        summary(model, dummy_input)

    except:
        !pip install torchsummary
        from torchsummary import summary

        # Create a dummy input tensor and get its shape as a tuple
        dummy_input = torch.randint(0, num_items, (config.BATCH_SIZE, config.MAX_LENGTH), dtype=torch.long).to(config.DEVICE)
        summary(model, input_size=tuple(dummy_input.shape))


    return model, optimizer, loss_func

Train model

In [9]:
def run_training(config):
    """运行完整的训练流程"""
    # 设置随机种子
    torch.manual_seed(config.SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(config.SEED)

    # 创建输出目录
    os.makedirs(config.CHECKPOINT_DIR, exist_ok=True)

    # 1. 加载数据
    dataset, dataloader = load_data(config)

    # 2. 创建模型
    model, optimizer, loss_func = create_model(config, dataset.num_items)

    # 3. 训练模型
    best_epoch, best_ndcg = train_model(
        config, dataset, model, dataloader, optimizer, loss_func
    )

    # 4. 训练完成后加载最佳模型
    model_path = os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}.pth')
    model.load_state_dict(torch.load(model_path))

    # 5. 在测试集上评估（只评估一次）
    hit_test, ndcg_test, mrr_test, rec_list_test = evaluate(
        dataset, model, config.DEVICE, config.BATCH_SIZE, config.TOPK_LIST, 'test'
    )

    # 保存测试集推荐列表
    with open(os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}_rec_list_test.pkl'), 'wb') as f:
        pickle.dump(rec_list_test, f)

    # 保存最终结果日志（简洁版）
    final_log_path = os.path.join(config.CHECKPOINT_DIR, f'{config.DATASET}_final_results.log')
    with open(final_log_path, 'w') as f:
        f.write(f'Best epoch: {best_epoch}\n')
        f.write(f'Validation NDCG@10: {best_ndcg:.4f}\n')
        f.write(f'Test - Hit: {hit_test}, NDCG: {ndcg_test}, MRR: {mrr_test}\n')

    return best_epoch, best_ndcg, ndcg_test[max(config.TOPK_LIST)]

Main

In [10]:
def main():
    config = Config()

    print("="*60)
    print("SASRec Baseline Training")
    print("="*60)
    print(f"Device: {config.DEVICE}")
    print(f"Dataset: {config.DATASET}")
    print(f"Embedding dim: {config.EMBEDDING_DIM}")
    print(f"Max length: {config.MAX_LENGTH}")
    print(f"Transformer layers: {config.NUM_LAYERS}")
    print(f"Batch size: {config.BATCH_SIZE}")
    print(f"Num of epochs: {config.NUM_EPOCHS}")
    print(f"lr: {config.LR}")
    print("="*60)

    # 运行训练
    best_epoch, best_ndcg = run_training(config)
    # best_epoch, best_val_ndcg, final_test_ndcg = run_training(config)

    print(f"\n Training finished")
    print(f"Best result: Epoch {best_epoch}, NDCG@10 = {best_ndcg:.4f}")


if __name__ == '__main__':
    main()

SASRec Baseline Training
Device: cuda:0
Dataset: Grocery_and_Gourmet_Food
Embedding dim: 128
Max length: 32
Transformer layers: 2
Batch size: 64
Num od epochs: 10
lr: 0.001

loading data
from: /content/drive/MyDrive/11785IDL/IDL_Project/data/Grocery_and_Gourmet_Food.csv


Loading data: 4125640it [02:20, 29392.18it/s]


# Users: 419876
# Items: 135194
# Interactions: 4125640


Preparing sequences: 100%|██████████| 419876/419876 [00:04<00:00, 103371.88it/s]


Data loading completed.
training batch: 10861

Create model
----------------------------------------------------------------------------------------------------
Layer                   Kernel Shape         Output Shape         # Params (K)      # Mult-Adds (M)
0_Embedding            [128, 135195]        [64, 32, 128]            17,304.96                17.30
1_Embedding                [128, 32]        [64, 32, 128]                 4.10                 0.00
2_Linear                  [128, 128]        [64, 32, 128]                16.38                 0.02
3_Linear                  [128, 128]        [64, 32, 128]                16.38                 0.02
4_Linear                  [128, 128]        [64, 32, 128]                16.38                 0.02
5_Dropout                          -        [64, 32, 128]                    -                    -
6_LayerNorm                    [128]        [64, 32, 128]                 0.26                 0.00
7_Linear                  [128, 128]   

Evaluating valid:   0%|          | 0/419876 [02:59<?, ?it/s]
  scaler = GradScaler() if use_amp else None



[VALID]
Hit@[10]: {10: 0.00011193781021063362}
NDCG@[10]: {10: 4.7811128264238586e-05}
MRR@[10]: {10: 2.8853000742320437e-05}

Epoch 1/10


  with autocast(): # Removed 'cuda'
Epoch 1: 100%|██████████| 10861/10861 [10:42<00:00, 16.89it/s, loss=6.6641]



Train Loss: 6.6641


Evaluating valid:   0%|          | 0/419876 [02:56<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.019441454143604302}
NDCG@[10]: {10: 0.010365167522818266}
MRR@[10]: {10: 0.007625480735279878}

✅ Best model saved! NDCG@10: 0.0104

Epoch 2/10


Epoch 2: 100%|██████████| 10861/10861 [10:42<00:00, 16.92it/s, loss=3.9615]



Train Loss: 3.9615


Evaluating valid:   0%|          | 0/419876 [02:56<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.02129438215092075}
NDCG@[10]: {10: 0.01141163174315935}
MRR@[10]: {10: 0.008429629715318868}

✅ Best model saved! NDCG@10: 0.0114

Epoch 3/10


Epoch 3: 100%|██████████| 10861/10861 [10:40<00:00, 16.96it/s, loss=3.3455]



Train Loss: 3.3455


Evaluating valid:   0%|          | 0/419876 [02:56<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.02171117186979013}
NDCG@[10]: {10: 0.011675618737488251}
MRR@[10]: {10: 0.00864706825008208}

✅ Best model saved! NDCG@10: 0.0117

Epoch 4/10


Epoch 4: 100%|██████████| 10861/10861 [10:39<00:00, 16.98it/s, loss=3.0791]



Train Loss: 3.0791


Evaluating valid:   0%|          | 0/419876 [02:55<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.022606674351475196}
NDCG@[10]: {10: 0.012152880752748054}
MRR@[10]: {10: 0.008989921741067386}

✅ Best model saved! NDCG@10: 0.0122

Epoch 5/10


Epoch 5: 100%|██████████| 10861/10861 [10:39<00:00, 16.98it/s, loss=2.9188]



Train Loss: 2.9188


Evaluating valid:   0%|          | 0/419876 [02:56<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.022280387542988882}
NDCG@[10]: {10: 0.012117995227981078}
MRR@[10]: {10: 0.009042891839419965}

Patience: 4/5

Epoch 6/10


Epoch 6: 100%|██████████| 10861/10861 [10:40<00:00, 16.95it/s, loss=2.8093]



Train Loss: 2.8093


Evaluating valid:   0%|          | 0/419876 [02:56<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.022775771894559347}
NDCG@[10]: {10: 0.01238545859203993}
MRR@[10]: {10: 0.009238691332452455}

✅ Best model saved! NDCG@10: 0.0124

Epoch 7/10


Epoch 7: 100%|██████████| 10861/10861 [10:39<00:00, 16.99it/s, loss=2.7255]



Train Loss: 2.7255


Evaluating valid:   0%|          | 0/419876 [02:55<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.02244472177500024}
NDCG@[10]: {10: 0.01221069518853322}
MRR@[10]: {10: 0.009112533526527232}

Patience: 4/5

Epoch 8/10


Epoch 8: 100%|██████████| 10861/10861 [10:39<00:00, 16.98it/s, loss=2.6578]



Train Loss: 2.6578


Evaluating valid:   0%|          | 0/419876 [02:55<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.02222322781011537}
NDCG@[10]: {10: 0.012193201764138805}
MRR@[10]: {10: 0.00915698826123577}

Patience: 3/5

Epoch 9/10


Epoch 9: 100%|██████████| 10861/10861 [10:38<00:00, 17.00it/s, loss=2.6045]



Train Loss: 2.6045


Evaluating valid:   0%|          | 0/419876 [02:55<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.022613819318084386}
NDCG@[10]: {10: 0.012362767831103514}
MRR@[10]: {10: 0.00926187183457193}

Patience: 2/5

Epoch 10/10


Epoch 10: 100%|██████████| 10861/10861 [10:39<00:00, 16.98it/s, loss=2.5569]



Train Loss: 2.5569


Evaluating valid:   0%|          | 0/419876 [02:55<?, ?it/s]



[VALID]
Hit@[10]: {10: 0.022549514618601683}
NDCG@[10]: {10: 0.012379214530199393}
MRR@[10]: {10: 0.009301326982856822}

Patience: 1/5

Training finished!
Best epoch: 6
Best NDCG@10: 0.0124


Evaluating test:   0%|          | 0/419876 [02:55<?, ?it/s]



[TEST]
Hit@[10]: {10: 0.018900818336842307}
NDCG@[10]: {10: 0.010185136535850757}
MRR@[10]: {10: 0.007557184872570897}

 Training finished
Best result: Epoch 6, Val_NDCG@10 = 0.0124, Test_NDCG@10: 0.0102
