In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
from collections import defaultdict
import math


In [2]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
embedding_dim = 128
num_heads = 4
num_layers = 2
dropout = 0.1
max_seq_length = 200
window_size = 20  # ±50 items around each mask
chunk_size = 100  # Approximate chunk size for compression
batch_size = 8192
num_epochs = 5
learning_rate = 0.001
mask_prob = 0.15  # Probability of masking an item
num_negatives = 99  # For evaluation

In [3]:

# Dataset class
class MovieLensDataset(Dataset):
    def __init__(self, user_dict, max_seq_length, window_size, mask_prob):
        self.user_dict = user_dict
        self.max_seq_length = max_seq_length
        self.window_size = window_size
        self.mask_prob = mask_prob
        self.users = list(user_dict.keys())

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        user = self.users[idx]
        seq = self.user_dict[user][:self.max_seq_length]
        if len(seq) < 2:
            seq = [0] * self.max_seq_length  # Pad with zeros if too short
        return self.process_sequence(seq)

    def process_sequence(self, seq):
        # Pad or truncate sequence
        seq = seq[:self.max_seq_length]
        if len(seq) < self.max_seq_length:
            seq = seq + [0] * (self.max_seq_length - len(seq))

        # Randomly mask items
        mask_positions = []
        for i in range(len(seq)):
            if seq[i] == 0:  # Skip padding
                continue
            if random.random() < self.mask_prob:
                mask_positions.append(i)
                if random.random() < 0.8:
                    seq[i] = -1  # [MASK] token (use -1 as placeholder)
                elif random.random() < 0.5:
                    seq[i] = random.randint(1, num_items)  # Random item
                # Else keep original (10% chance)

        if not mask_positions:
            # Ensure at least one mask
            i = random.randint(0, len(seq) - 1)
            while seq[i] == 0:
                i = random.randint(0, len(seq) - 1)
            mask_positions.append(i)
            seq[i] = -1

        # Dynamic chunking
        compressed_seq, chunk_map = self.compress_sequence(seq, mask_positions)
        return {
            "seq": torch.tensor(seq, dtype=torch.long),
            "compressed_seq": torch.tensor(compressed_seq, dtype=torch.long),
            "mask_positions": torch.tensor(mask_positions, dtype=torch.long),
            "chunk_map": chunk_map
        }

    def compress_sequence(self, seq, mask_positions):
        # Create windows around mask positions
        windows = []
        for pos in sorted(mask_positions):
            start = max(0, pos - self.window_size)
            end = min(len(seq), pos + self.window_size + 1)
            windows.append((start, end))

        # Merge overlapping windows
        merged = []
        current_start, current_end = windows[0]
        for start, end in windows[1:]:
            if start <= current_end:
                current_end = max(current_end, end)
            else:
                merged.append((current_start, current_end))
                current_start, current_end = start, end
        merged.append((current_start, current_end))

        # Identify gaps and compress
        compressed_seq = []
        chunk_map = []  # Maps compressed_seq indices to (start, end) or item index
        last_end = 0
        chunk_id = num_items + 1  # Start chunk IDs after item IDs

        for start, end in merged:
            # Compress gap before window
            if last_end < start:
                gap = seq[last_end:start]
                compressed_seq.append(chunk_id)
                chunk_map.append((last_end, start))
                chunk_id += 1
            # Add window items
            for i in range(start, end):
                compressed_seq.append(seq[i])
                chunk_map.append(i)
            last_end = end

        # Compress final gap
        if last_end < len(seq):
            compressed_seq.append(chunk_id)
            chunk_map.append((last_end, len(seq)))

        return compressed_seq, chunk_map

In [4]:
class SequentialRecommender(nn.Module):
    def __init__(self, num_items, embedding_dim, num_heads, num_layers, dropout):
        super(SequentialRecommender, self).__init__()
        self.num_items = num_items
        self.embedding = nn.Embedding(num_items + 1000, embedding_dim, padding_idx=0)  # +1000 for chunk IDs
        self.mask_embedding = nn.Parameter(torch.randn(embedding_dim))
        self.pos_encoding = nn.Parameter(self.create_pos_encoding(5000, embedding_dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=embedding_dim,
                nhead=num_heads,
                dim_feedforward=embedding_dim * 4,
                dropout=dropout,
                batch_first=True
            ),
            num_layers=num_layers
        )
        self.fc = nn.Linear(embedding_dim, num_items)

    def create_pos_encoding(self, max_len, dim):
        pe = torch.zeros(max_len, dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe

    def compress_chunk(self, chunk, chunk_len):
        # Average pooling for chunk compression
        chunk_emb = self.embedding(chunk)  # (batch, chunk_len, dim)
        chunk_mask = (chunk != 0).float().unsqueeze(-1)  # (batch, chunk_len, 1)
        chunk_emb = chunk_emb * chunk_mask
        chunk_sum = chunk_emb.sum(dim=1)  # (batch, dim)
        chunk_count = chunk_mask.sum(dim=1).clamp(min=1)  # (batch, 1)
        return chunk_sum / chunk_count  # (batch, dim)

    def forward(self, batch):
        seq = batch["compressed_seq"].to(device)  # (batch, compressed_len)
        chunk_map = batch["chunk_map"]
        mask_positions = batch["mask_positions"].to(device)  # (batch, num_masks)

        # Initialize embeddings
        batch_size, seq_len = seq.shape
        embeddings = torch.zeros(batch_size, seq_len, embedding_dim).to(device)

        for b in range(batch_size):
            for i in range(seq_len):
                item = seq[b, i].item()
                if item == -1:  # [MASK]
                    embeddings[b, i] = self.mask_embedding
                elif item >= self.num_items:  # Chunk
                    chunk_start, chunk_end = chunk_map[b][i]
                    chunk = batch["seq"][b, chunk_start:chunk_end].unsqueeze(0)
                    embeddings[b, i] = self.compress_chunk(chunk, chunk_end - chunk_start)[0]
                else:  # Item
                    embeddings[b, i] = self.embedding(seq[b, i])

        # Add positional encodings
        embeddings = embeddings + self.pos_encoding[:seq_len].unsqueeze(0)

        # Transformer
        mask = (seq == 0).to(device)
        output = self.transformer(embeddings, src_key_padding_mask=mask)  # (batch, seq_len, dim)

        # Predict items at mask positions
        logits = []
        for b in range(batch_size):
            mask_pos = mask_positions[b]
            mask_output = output[b, mask_pos]  # (num_masks, dim)
            logit = self.fc(mask_output)  # (num_masks, num_items)
            logits.append(logit)
        logits = torch.stack(logits)  # (batch, num_masks, num_items)

        return logits

    def predict(self, input_seq):
        # For inference: append [NEXT] and compress
        seq = input_seq[:, :max_seq_length]
        seq = torch.where(seq >= self.num_items, torch.zeros_like(seq), seq)  # Replace invalid items
        next_pos = seq.shape[1]
        seq = torch.cat([seq, torch.tensor([[-1]], device=seq.device)], dim=1)  # Append [NEXT]

        # Compress sequence with [NEXT] as mask
        compressed_seqs = []
        for b in range(seq.shape[0]):
            s = seq[b].cpu().numpy().tolist()
            compressed, _ = MovieLensDataset.compress_sequence(
                None, s, [next_pos]
            )
            compressed_seqs.append(compressed)
        max_len = max(len(s) for s in compressed_seqs)
        compressed_seq = torch.zeros(seq.shape[0], max_len, dtype=torch.long)
        for b, s in enumerate(compressed_seqs):
            compressed_seq[b, :len(s)] = torch.tensor(s)
        compressed_seq = compressed_seq.to(device)

        # Forward pass
        embeddings = torch.zeros(seq.shape[0], max_len, embedding_dim).to(device)
        for b in range(seq.shape[0]):
            for i in range(len(compressed_seqs[b])):
                item = compressed_seq[b, i].item()
                if item == -1:  # [NEXT]
                    embeddings[b, i] = self.mask_embedding
                elif item >= self.num_items:  # Chunk
                    chunk_start = i * chunk_size
                    chunk_end = min(chunk_start + chunk_size, input_seq.shape[1])
                    chunk = input_seq[b, chunk_start:chunk_end].unsqueeze(0)
                    embeddings[b, i] = self.compress_chunk(chunk, chunk_end - chunk_start)[0]
                else:
                    embeddings[b, i] = self.embedding(compressed_seq[b, i])

        embeddings = embeddings + self.pos_encoding[:max_len].unsqueeze(0)
        mask = (compressed_seq == 0).to(device)
        output = self.transformer(embeddings, src_key_padding_mask=mask)
        logits = self.fc(output[:, -1])  # Predict at [NEXT]
        return logits

In [5]:
def evaluate(model, user_dict, num_items, max_seq_length, device):
    model.eval()
    NDCG, HR, valid_users = 0.0, 0.0, 0

    for user, items in user_dict.items():
        if len(items) < 2:
            continue

        seq = items[:max_seq_length]
        input_seq = torch.tensor(seq[:-1], dtype=torch.long).unsqueeze(0).to(device)
        target = seq[-1]
        candidates = [target] + random.sample(list(set(range(1, num_items + 1)) - set(items)), num_negatives)

        with torch.no_grad():
            logits = model.predict(input_seq)  # (1, num_items)
            scores = logits[0, candidates]  # Scores for candidates
            ranked = torch.argsort(scores, descending=True).cpu().numpy()
            rank = np.where(ranked == 0)[0][0] + 1

        valid_users += 1
        HR += int(rank <= 10)
        NDCG += 1 / np.log2(rank + 1) if rank <= 10 else 0

        if valid_users % 100 == 0:
            print(f"Validated users: {valid_users}, HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")

    print(f"Final HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")
    return HR / valid_users, NDCG / valid_users


In [6]:

# Load and preprocess dataset
def load_movielens(file_path):
    user_dict = defaultdict(list)
    item_set = set()
    with open(file_path, 'r') as f:
        for line in f:
            user_id, item_id = map(int, line.strip().split())
            user_dict[user_id].append(item_id)
            item_set.add(item_id)
    num_items = max(item_set)
    return user_dict, num_items


In [7]:
file_path = "data/ml-1m.txt"
user_dict, num_items = load_movielens(file_path)
print(f"Number of users: {len(user_dict)}, Number of items: {num_items}")

# Create dataset and dataloader
dataset = MovieLensDataset(user_dict, max_seq_length, window_size, mask_prob)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize model


Number of users: 6040, Number of items: 3416


In [8]:
model = SequentialRecommender(num_items, embedding_dim, num_heads, num_layers, dropout).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss(ignore_index=0)

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in dataloader:
        seq = batch["seq"].to(device)
        logits = model(batch)  # (batch, num_masks, num_items)
        mask_positions = batch["mask_positions"].to(device)

        loss = 0
        for b in range(logits.shape[0]):
            targets = seq[b, mask_positions[b]]  # Ground truth items
            pred = logits[b]  # (num_masks, num_items)
            loss += criterion(pred, targets)
        loss /= logits.shape[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader):.4f}")



RuntimeError: stack expects each tensor to be equal size, but got [50] at entry 0 and [45] at entry 1

In [None]:
# Evaluate
evaluate(model, user_dict, num_items, max_seq_length, device)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
embedding_dim = 128
num_heads = 4
num_layers = 2
dropout = 0.1
max_seq_length = 200
window_size = 50
chunk_size = 100
batch_size = 32
num_epochs = 5
learning_rate = 0.001
mask_prob = 0.15
num_negatives = 99

# Custom collation function
def custom_collate_fn(batch):
    seqs = [item["seq"] for item in batch]
    compressed_seqs = [item["compressed_seq"] for item in batch]
    mask_positions = [item["mask_positions"] for item in batch]
    chunk_maps = [item["chunk_map"] for item in batch]

    max_comp_len = max(len(cs) for cs in compressed_seqs)
    max_mask_len = max(len(mp) for mp in mask_positions)

    seqs_padded = torch.stack(seqs)
    compressed_seqs_padded = torch.zeros(len(batch), max_comp_len, dtype=torch.long)
    mask_positions_padded = torch.zeros(len(batch), max_mask_len, dtype=torch.long)

    for i in range(len(batch)):
        compressed_seqs_padded[i, :len(compressed_seqs[i])] = compressed_seqs[i]
        mask_positions_padded[i, :len(mask_positions[i])] = mask_positions[i]

    return {
        "seq": seqs_padded,
        "compressed_seq": compressed_seqs_padded,
        "mask_positions": mask_positions_padded,
        "chunk_map": chunk_maps
    }

# Dataset class
class MovieLensDataset(Dataset):
    def __init__(self, user_dict, num_items, max_seq_length, window_size, mask_prob):
        self.user_dict = user_dict
        self.num_items = num_items
        self.max_seq_length = max_seq_length
        self.window_size = window_size
        self.mask_prob = mask_prob
        self.users = list(user_dict.keys())

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        user = self.users[idx]
        seq = self.user_dict[user][:self.max_seq_length]
        if len(seq) < 2:
            seq = [0] * self.max_seq_length
        return self.process_sequence(seq)

    def process_sequence(self, seq):
        seq = seq[:self.max_seq_length]
        if len(seq) < self.max_seq_length:
            seq = seq + [0] * (self.max_seq_length - len(seq))

        mask_positions = []
        masked_seq = seq.copy()
        for i in range(len(seq)):
            if seq[i] == 0:
                continue
            if random.random() < self.mask_prob:
                mask_positions.append(i)
                if random.random() < 0.8:
                    masked_seq[i] = self.num_items  # Use num_items as [MASK]
                elif random.random() < 0.5:
                    masked_seq[i] = random.randint(1, self.num_items)
                # Else keep original

        if not mask_positions:
            i = random.randint(0, len(seq) - 1)
            while seq[i] == 0:
                i = random.randint(0, len(seq) - 1)
            mask_positions.append(i)
            masked_seq[i] = self.num_items

        compressed_seq, chunk_map = self.compress_sequence(masked_seq, mask_positions)
        return {
            "seq": torch.tensor(seq, dtype=torch.long),
            "compressed_seq": torch.tensor(compressed_seq, dtype=torch.long),
            "mask_positions": torch.tensor(mask_positions, dtype=torch.long),
            "chunk_map": chunk_map
        }

    def compress_sequence(self, seq, mask_positions):
        windows = []
        for pos in sorted(mask_positions):
            start = max(0, pos - self.window_size)
            end = min(len(seq), pos + self.window_size + 1)
            windows.append((start, end))

        merged = []
        if windows:
            current_start, current_end = windows[0]
            for start, end in windows[1:]:
                if start <= current_end:
                    current_end = max(current_end, end)
                else:
                    merged.append((current_start, current_end))
                    current_start, current_end = start, end
            merged.append((current_start, current_end))

        compressed_seq = []
        chunk_map = []
        last_end = 0
        chunk_id = self.num_items + 1

        for start, end in merged:
            if last_end < start:
                compressed_seq.append(chunk_id)
                chunk_map.append((last_end, start))
                chunk_id += 1
            for i in range(start, end):
                compressed_seq.append(seq[i])
                chunk_map.append(i)
            last_end = end

        if last_end < len(seq):
            compressed_seq.append(chunk_id)
            chunk_map.append((last_end, len(seq)))

        return compressed_seq, chunk_map

# Model
class SequentialRecommender(nn.Module):
    def __init__(self, num_items, embedding_dim, num_heads, num_layers, dropout):
        super(SequentialRecommender, self).__init__()
        self.num_items = num_items
        self.embedding = nn.Embedding(num_items + 10000, embedding_dim, padding_idx=0)  # Increased size
        self.pos_encoding = nn.Parameter(self.create_pos_encoding(5000, embedding_dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=embedding_dim,
                nhead=num_heads,
                dim_feedforward=embedding_dim * 4,
                dropout=dropout,
                batch_first=True
            ),
            num_layers=num_layers
        )
        self.fc = nn.Linear(embedding_dim, num_items)

    def create_pos_encoding(self, max_len, dim):
        pe = torch.zeros(max_len, dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe

    def compress_chunk(self, chunk, chunk_len):
        chunk_emb = self.embedding(chunk)
        chunk_mask = (chunk != 0).float().unsqueeze(-1)
        chunk_emb = chunk_emb * chunk_mask
        chunk_sum = chunk_emb.sum(dim=1)
        chunk_count = chunk_mask.sum(dim=1).clamp(min=1)
        return chunk_sum / chunk_count

    def forward(self, batch):
        seq = batch["seq"].to(device)
        compressed_seq = batch["compressed_seq"].to(device)
        mask_positions = batch["mask_positions"].to(device)
        chunk_map = batch["chunk_map"]

        batch_size, seq_len = compressed_seq.shape
        embeddings = torch.zeros(batch_size, seq_len, embedding_dim).to(device)

        for b in range(batch_size):
            for i in range(seq_len):
                item = compressed_seq[b, i].item()
                if item == self.num_items:  # [MASK]
                    embeddings[b, i] = self.embedding(torch.tensor(self.num_items, device=device))
                elif item > self.num_items:  # Chunk
                    chunk_start, chunk_end = chunk_map[b][i]
                    chunk = seq[b, chunk_start:chunk_end].unsqueeze(0)
                    embeddings[b, i] = self.compress_chunk(chunk, chunk_end - chunk_start)[0]
                else:
                    embeddings[b, i] = self.embedding(compressed_seq[b, i])

        embeddings = embeddings + self.pos_encoding[:seq_len].unsqueeze(0)
        mask = (compressed_seq == 0).to(device)
        output = self.transformer(embeddings, src_key_padding_mask=mask)

        logits = []
        for b in range(batch_size):
            mask_pos = mask_positions[b][mask_positions[b] != 0]
            if len(mask_pos) == 0:
                continue
            mask_output = output[b, mask_pos]
            logit = self.fc(mask_output)
            logits.append(logit)
        if not logits:
            return torch.tensor([]).to(device)
        return torch.cat(logits, dim=0)

    def predict(self, input_seq):
        seq = input_seq[:, :max_seq_length]
        seq = torch.where(seq >= self.num_items, torch.zeros_like(seq), seq)
        next_pos = seq.shape[1]
        seq = torch.cat([seq, torch.tensor([[self.num_items]], device=seq.device)], dim=1)

        compressed_seqs = []
        chunk_maps = []
        for b in range(seq.shape[0]):
            s = seq[b].cpu().numpy().tolist()
            compressed, chunk_map = MovieLensDataset.compress_sequence(None, s, [next_pos])
            compressed_seqs.append(compressed)
            chunk_maps.append(chunk_map)

        max_len = max(len(s) for s in compressed_seqs)
        compressed_seq = torch.zeros(seq.shape[0], max_len, dtype=torch.long)
        for b, s in enumerate(compressed_seqs):
            compressed_seq[b, :len(s)] = torch.tensor(s)
        compressed_seq = compressed_seq.to(device)

        embeddings = torch.zeros(seq.shape[0], max_len, embedding_dim).to(device)
        for b in range(seq.shape[0]):
            for i in range(len(compressed_seqs[b])):
                item = compressed_seq[b, i].item()
                if item == self.num_items:
                    embeddings[b, i] = self.embedding(torch.tensor(self.num_items, device=device))
                elif item > self.num_items:
                    chunk_start, chunk_end = chunk_maps[b][i]
                    chunk = seq[b, chunk_start:chunk_end].unsqueeze(0)
                    embeddings[b, i] = self.compress_chunk(chunk, chunk_end - chunk_start)[0]
                else:
                    embeddings[b, i] = self.embedding(compressed_seq[b, i])

        embeddings = embeddings + self.pos_encoding[:max_len].unsqueeze(0)
        mask = (compressed_seq == 0).to(device)
        output = self.transformer(embeddings, src_key_padding_mask=mask)
        logits = self.fc(output[:, -1])
        return logits

# Evaluation
def evaluate(model, user_dict, num_items, max_seq_length, device):
    model.eval()
    NDCG, HR, valid_users = 0.0, 0.0, 0

    for user, items in user_dict.items():
        if len(items) < 2:
            continue

        seq = items[:max_seq_length]
        input_seq = torch.tensor(seq[:-1], dtype=torch.long).unsqueeze(0).to(device)
        target = seq[-1]
        candidates = [target] + random.sample(list(set(range(1, num_items + 1)) - set(items)), num_negatives)

        with torch.no_grad():
            logits = model.predict(input_seq)
            scores = logits[0, candidates]
            ranked = torch.argsort(scores, descending=True).cpu().numpy()
            rank = np.where(ranked == 0)[0][0] + 1

        valid_users += 1
        HR += int(rank <= 10)
        NDCG += 1 / np.log2(rank + 1) if rank <= 10 else 0

        if valid_users % 100 == 0:
            print(f"Validated users: {valid_users}, HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")

    print(f"Final HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")
    return HR / valid_users, NDCG / valid_users

# Load dataset
def load_movielens(file_path):
    user_dict = defaultdict(list)
    item_set = set()
    with open(file_path, 'r') as f:
        for line in f:
            user_id, item_id = map(int, line.strip().split())
            user_dict[user_id].append(item_id)
            item_set.add(item_id)
    num_items = max(item_set)
    return user_dict, num_items

# Main
if __name__ == "__main__":
    file_path = "data/ml-1m.txt"
    user_dict, num_items = load_movielens(file_path)
    print(f"Number of users: {len(user_dict)}, Number of items: {num_items}")

    dataset = MovieLensDataset(user_dict, num_items, max_seq_length, window_size, mask_prob)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)

    model = SequentialRecommender(num_items, embedding_dim, num_heads, num_layers, dropout).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    
    # length of dataloader
    dataloader_length = len(dataloader)
    print(f"Length of dataloader: {dataloader_length}")
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_idx, batch in enumerate(dataloader):
            seq = batch["seq"].to(device)
            logits = model(batch)
            if logits.numel() == 0:
                continue

            mask_positions = batch["mask_positions"].to(device)
            targets = []
            for b in range(seq.shape[0]):
                mask_pos = mask_positions[b][mask_positions[b] != 0]
                if len(mask_pos) > 0:
                    valid_targets = seq[b, mask_pos]
                    # Filter out invalid targets (e.g., masks or padding)
                    valid_targets = valid_targets[valid_targets != num_items]  # Exclude [MASK]
                    valid_targets = valid_targets[valid_targets != 0]  # Exclude padding
                    if len(valid_targets) > 0:
                        targets.append(valid_targets)
            if not targets:
                continue
            targets = torch.cat(targets).to(device)

            # Ensure logits and targets align
            if logits.shape[0] != targets.shape[0]:
                print(f"Batch {batch_idx}: Logits shape {logits.shape}, Targets shape {targets.shape}")
                continue

            loss = criterion(logits, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            print(f"Batch {batch_idx}, Loss: {loss.item():.4f}")
            

        print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader):.4f}")

    evaluate(model, user_dict, num_items, max_seq_length, device)



Number of users: 6040, Number of items: 3416
Length of dataloader: 189
Batch 0, Loss: 8.2981
Batch 1, Loss: 8.2639
Batch 2, Loss: 8.2037
Batch 3, Loss: 8.1691
Batch 4, Loss: 8.0988
Batch 5, Loss: 8.0397
Batch 6, Loss: 8.0428
Batch 7, Loss: 7.9012
Batch 8, Loss: 7.9642
Batch 9, Loss: 7.8748
Batch 10, Loss: 7.8129
Batch 11, Loss: 7.7404
Batch 12, Loss: 7.7717
Batch 13, Loss: 7.6499
Batch 14, Loss: 7.7104
Batch 15, Loss: 7.6264
Batch 16, Loss: 7.6339
Batch 17, Loss: 7.5514
Batch 18, Loss: 7.5494
Batch 19, Loss: 7.4250
Batch 20, Loss: 7.5327
Batch 21, Loss: 7.4815
Batch 22, Loss: 7.5279
Batch 23, Loss: 7.4984
Batch 24, Loss: 7.3973
Batch 25, Loss: 7.4732
Batch 26, Loss: 7.5057
Batch 27, Loss: 7.4662
Batch 28, Loss: 7.2851
Batch 29, Loss: 7.4433
Batch 30, Loss: 7.3849
Batch 31, Loss: 7.3621
Batch 32, Loss: 7.3990
Batch 33, Loss: 7.5798
Batch 34, Loss: 7.3565
Batch 35, Loss: 7.4002
Batch 36, Loss: 7.3881
Batch 37, Loss: 7.5649
Batch 38, Loss: 7.3811
Batch 39, Loss: 7.3479
Batch 40, Loss: 7.5

AttributeError: 'NoneType' object has no attribute 'window_size'

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
from collections import defaultdict
import math

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
embedding_dim = 128
num_heads = 4
num_layers = 2
dropout = 0.1
max_seq_length = 200
window_size = 50
chunk_size = 100
batch_size = 128
num_epochs = 5
learning_rate = 0.001
mask_prob = 0.15
num_masks_per_batch = 3
num_negatives = 99

# Custom collation function
def custom_collate_fn(batch):
    seqs = torch.stack([item["seq"] for item in batch])
    return {"seq": seqs}

# Dataset class
class MovieLensDataset(Dataset):
    def __init__(self, user_dict, num_items, max_seq_length):
        self.user_dict = user_dict
        self.num_items = num_items
        self.max_seq_length = max_seq_length
        self.users = list(user_dict.keys())
        self.precomputed = self.precompute_sequences()

    def precompute_sequences(self):
        precomputed = {}
        for user in self.users:
            seq = self.user_dict[user][:self.max_seq_length]
            if len(seq) < 2:
                seq = [0] * self.max_seq_length
            else:
                seq = seq + [0] * (self.max_seq_length - len(seq)) if len(seq) < self.max_seq_length else seq
            precomputed[user] = torch.tensor(seq, dtype=torch.long)
        return precomputed

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        user = self.users[idx]
        seq = self.precomputed[user].clone()
        return {"seq": seq}

# Model
class SequentialRecommender(nn.Module):
    def __init__(self, num_items, embedding_dim, num_heads, num_layers, dropout, window_size):
        super(SequentialRecommender, self).__init__()
        self.num_items = num_items
        self.window_size = window_size
        self.embedding = nn.Embedding(num_items + 10000, embedding_dim, padding_idx=0)
        self.pos_encoding = nn.Parameter(self.create_pos_encoding(5000, embedding_dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=embedding_dim,
                nhead=num_heads,
                dim_feedforward=embedding_dim * 4,
                dropout=dropout,
                batch_first=True
            ),
            num_layers=num_layers
        )
        self.fc = nn.Linear(embedding_dim, num_items)

    def create_pos_encoding(self, max_len, dim):
        pe = torch.zeros(max_len, dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe

    def compress_sequence(self, seq, mask_positions):
        batch_size, seq_len = seq.shape
        windows = []
        for pos in mask_positions:
            start = max(0, pos - self.window_size)
            end = min(seq_len, pos + self.window_size + 1)
            windows.append((start, end))

        merged = []
        if windows:
            current_start, current_end = windows[0]
            for start, end in windows[1:]:
                if start <= current_end:
                    current_end = max(current_end, end)
                else:
                    merged.append((current_start, current_end))
                    current_start, current_end = start, end
            merged.append((current_start, current_end))

        total_len = 0
        last_end = 0
        for start, end in merged:
            if last_end < start:
                total_len += 1
            total_len += end - start
            last_end = end
        if last_end < seq_len:
            total_len += 1

        compressed_seq = torch.zeros(batch_size, total_len, dtype=torch.long, device=seq.device)
        chunk_map = [[] for _ in range(batch_size)]
        mask_indices = []
        chunk_id = self.num_items + 1
        pos = 0

        last_end = 0
        for start, end in merged:
            if last_end < start:
                compressed_seq[:, pos] = chunk_id
                for b in range(batch_size):
                    chunk_map[b].append((last_end, start))
                chunk_id += 1
                pos += 1
            window_len = end - start
            compressed_seq[:, pos:pos + window_len] = seq[:, start:end].clone()
            for mp in mask_positions:
                if start <= mp < end and pos + (mp - start) not in mask_indices:  # Avoid duplicates
                    mask_indices.append(pos + (mp - start))
            for b in range(batch_size):
                chunk_map[b].extend(list(range(start, end)))
            pos += window_len
            last_end = end

        if last_end < seq_len:
            compressed_seq[:, pos] = chunk_id
            for b in range(batch_size):
                chunk_map[b].append((last_end, seq_len))

        return compressed_seq, chunk_map, torch.tensor(mask_indices, device=seq.device)

    def forward(self, seq, mask_positions):
        batch_size, seq_len = seq.shape
        masked_seq = seq.clone()
        masked_seq[torch.arange(batch_size).unsqueeze(1), mask_positions] = self.num_items

        compressed_seq, chunk_map, mask_indices = self.compress_sequence(masked_seq, mask_positions)
        batch_size, comp_len = compressed_seq.shape
        embeddings = self.embedding(compressed_seq)

        is_chunk = (compressed_seq > self.num_items)
        chunk_indices = torch.where(is_chunk)
        if chunk_indices[0].numel() > 0:
            chunk_embeddings = []
            for b, i in zip(chunk_indices[0], chunk_indices[1]):
                chunk_start, chunk_end = chunk_map[b][i]
                chunk = seq[b, chunk_start:chunk_end].unsqueeze(0)
                chunk_emb = self.embedding(chunk)
                chunk_mask = (chunk != 0).float().unsqueeze(-1)
                chunk_sum = (chunk_emb * chunk_mask).sum(dim=1)
                chunk_count = chunk_mask.sum(dim=1).clamp(min=1)
                chunk_embeddings.append(chunk_sum / chunk_count)
            embeddings[chunk_indices] = torch.cat(chunk_embeddings)

        embeddings = embeddings + self.pos_encoding[:comp_len].unsqueeze(0)
        mask = (compressed_seq == 0).to(device)
        output = self.transformer(embeddings, src_key_padding_mask=mask)

        mask_output = output[torch.arange(batch_size).unsqueeze(1), mask_indices]
        logits = self.fc(mask_output)  # Shape: (batch_size, num_masks, num_items)
        return logits, mask_indices  # Return mask_indices for alignment

    def predict(self, input_seq):
        seq = input_seq[:, :max_seq_length]
        seq = torch.where(seq >= self.num_items, torch.zeros_like(seq), seq)
        next_pos = seq.shape[1]
        seq = torch.cat([seq, torch.full((seq.shape[0], 1), self.num_items, device=seq.device)], dim=1)
        mask_positions = torch.tensor([next_pos], device=seq.device)
        logits, _ = self.forward(seq, mask_positions)
        return logits

# Evaluation
def evaluate(model, user_dict, num_items, max_seq_length, device):
    model.eval()
    NDCG, HR, valid_users = 0.0, 0.0, 0

    for user, items in user_dict.items():
        if len(items) < 2:
            continue

        seq = items[:max_seq_length]
        input_seq = torch.tensor(seq[:-1], dtype=torch.long).unsqueeze(0).to(device)
        target = seq[-1]
        candidates = [target] + random.sample(list(set(range(1, num_items + 1)) - set(items)), num_negatives)

        with torch.no_grad():
            logits = model.predict(input_seq)
            scores = logits[0, 0, candidates]  # Single mask in predict
            ranked = torch.argsort(scores, descending=True).cpu().numpy()
            rank = np.where(ranked == 0)[0][0] + 1

        valid_users += 1
        HR += int(rank <= 10)
        NDCG += 1 / np.log2(rank + 1) if rank <= 10 else 0

        if valid_users % 100 == 0:
            print(f"Validated users: {valid_users}, HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")

    print(f"Final HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")
    return HR / valid_users, NDCG / valid_users

# Load dataset
def load_movielens(file_path):
    user_dict = defaultdict(list)
    item_set = set()
    with open(file_path, 'r') as f:
        for line in f:
            user_id, item_id = map(int, line.strip().split())
            user_dict[user_id].append(item_id)
            item_set.add(item_id)
    num_items = max(item_set)
    return user_dict, num_items

# Main
if __name__ == "__main__":
    file_path = "data/ml-1m.txt"
    user_dict, num_items = load_movielens(file_path)
    print(f"Number of users: {len(user_dict)}, Number of items: {num_items}")

    dataset = MovieLensDataset(user_dict, num_items, max_seq_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)

    model = SequentialRecommender(num_items, embedding_dim, num_heads, num_layers, dropout, window_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    print(f"Length of dataloader: {len(dataloader)}")
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_idx, batch in enumerate(dataloader):
            seq = batch["seq"].to(device)
            batch_size = seq.shape[0]
            valid_positions = (seq != 0).sum(dim=0).nonzero(as_tuple=True)[0]
            if len(valid_positions) < num_masks_per_batch:
                continue
            mask_positions = random.sample(valid_positions.tolist(), num_masks_per_batch)
            mask_positions = torch.tensor(mask_positions, device=device)

            logits, mask_indices = model(seq, mask_positions)  # Shape: (batch_size, num_masks, num_items)
            if logits.numel() == 0:
                continue

            targets = seq[torch.arange(batch_size).unsqueeze(1), mask_positions]  # Shape: (batch_size, num_masks)
            valid_mask = (targets != num_items) & (targets != 0)  # Shape: (batch_size, num_masks)

            # Reshape based on actual number of masks (mask_indices)
            num_masks = mask_indices.shape[0]
            logits = logits.view(batch_size * num_masks, num_items)  # Shape: (batch_size * num_masks, num_items)
            targets = targets.view(batch_size * num_masks_per_batch)  # Shape: (batch_size * num_masks)
            valid_mask = valid_mask.view(batch_size * num_masks_per_batch)

            # Filter only valid predictions
            targets = targets[valid_mask]
            logits = logits[:len(targets)]  # Adjust logits to match targets length

            if logits.shape[0] != targets.shape[0] or targets.numel() == 0:
                continue

            loss = criterion(logits, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Batch {batch_idx}, Loss: {loss.item():.4f}")

        print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader):.4f}")

    evaluate(model, user_dict, num_items, max_seq_length, device)

KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
from collections import defaultdict
import math

# Device configuration
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

# Hyperparameters
embedding_dim = 128
num_heads = 4
num_layers = 2
dropout = 0.1
max_seq_length = 200
window_size = 50
chunk_size = 100
batch_size = 512
num_epochs = 100
learning_rate = 0.001
mask_prob = 0.15
num_masks_per_batch = 3
num_negatives = 99

# Custom collation function
def custom_collate_fn(batch):
    seqs = torch.stack([item["seq"] for item in batch])
    return {"seq": seqs}

# Dataset class
class MovieLensDataset(Dataset):
    def __init__(self, user_dict, num_items, max_seq_length):
        self.user_dict = user_dict
        self.num_items = num_items
        self.max_seq_length = max_seq_length
        self.users = list(user_dict.keys())
        self.precomputed = self.precompute_sequences()

    def precompute_sequences(self):
        precomputed = {}
        for user in self.users:
            seq = self.user_dict[user][:self.max_seq_length]
            if len(seq) < 2:
                seq = [0] * self.max_seq_length
            else:
                seq = seq + [0] * (self.max_seq_length - len(seq)) if len(seq) < self.max_seq_length else seq
            precomputed[user] = torch.tensor(seq, dtype=torch.long)
        return precomputed

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        user = self.users[idx]
        seq = self.precomputed[user].clone()
        return {"seq": seq}

# Model
class SequentialRecommender(nn.Module):
    def __init__(self, num_items, embedding_dim, num_heads, num_layers, dropout, window_size):
        super(SequentialRecommender, self).__init__()
        self.num_items = num_items
        self.window_size = window_size
        self.embedding = nn.Embedding(num_items + 10000, embedding_dim, padding_idx=0)
        self.pos_encoding = nn.Parameter(self.create_pos_encoding(5000, embedding_dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=embedding_dim,
                nhead=num_heads,
                dim_feedforward=embedding_dim * 4,
                dropout=dropout,
                batch_first=True
            ),
            num_layers=num_layers
        )
        self.fc = nn.Linear(embedding_dim, num_items)

    def create_pos_encoding(self, max_len, dim):
        pe = torch.zeros(max_len, dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe

    def compress_sequence(self, seq, mask_positions):
        batch_size, seq_len = seq.shape
        windows = []
        for pos in mask_positions:
            start = max(0, pos - self.window_size)
            end = min(seq_len, pos + self.window_size + 1)
            windows.append((start, end))

        merged = []
        if windows:
            current_start, current_end = windows[0]
            for start, end in windows[1:]:
                if start <= current_end:
                    current_end = max(current_end, end)
                else:
                    merged.append((current_start, current_end))
                    current_start, current_end = start, end
            merged.append((current_start, current_end))

        total_len = 0
        last_end = 0
        for start, end in merged:
            if last_end < start:
                total_len += 1
            total_len += end - start
            last_end = end
        if last_end < seq_len:
            total_len += 1

        compressed_seq = torch.zeros(batch_size, total_len, dtype=torch.long, device=seq.device)
        chunk_map = [[] for _ in range(batch_size)]
        mask_indices = []
        chunk_id = self.num_items + 1
        pos = 0

        last_end = 0
        for start, end in merged:
            if last_end < start:
                compressed_seq[:, pos] = chunk_id
                for b in range(batch_size):
                    chunk_map[b].append((last_end, start))
                chunk_id += 1
                pos += 1
            window_len = end - start
            compressed_seq[:, pos:pos + window_len] = seq[:, start:end].clone()
            for mp in mask_positions:
                if start <= mp < end and pos + (mp - start) not in mask_indices:
                    mask_indices.append(pos + (mp - start))
            for b in range(batch_size):
                chunk_map[b].extend(list(range(start, end)))
            pos += window_len
            last_end = end

        if last_end < seq_len:
            compressed_seq[:, pos] = chunk_id
            for b in range(batch_size):
                chunk_map[b].append((last_end, seq_len))

        return compressed_seq, chunk_map, torch.tensor(mask_indices, device=seq.device)

    def forward(self, seq, mask_positions, is_predict=False):
        batch_size, seq_len = seq.shape
        masked_seq = seq.clone()
        masked_seq[torch.arange(batch_size).unsqueeze(1), mask_positions] = self.num_items

        compressed_seq, chunk_map, mask_indices = self.compress_sequence(masked_seq, mask_positions)
        batch_size, comp_len = compressed_seq.shape
        embeddings = self.embedding(compressed_seq)

        is_chunk = (compressed_seq > self.num_items)
        chunk_indices = torch.where(is_chunk)
        if chunk_indices[0].numel() > 0:
            chunk_embeddings = []
            for b, i in zip(chunk_indices[0], chunk_indices[1]):
                chunk_start, chunk_end = chunk_map[b][i]
                chunk = seq[b, chunk_start:chunk_end].unsqueeze(0)
                chunk_emb = self.embedding(chunk)
                chunk_mask = (chunk != 0).float().unsqueeze(-1)
                chunk_sum = (chunk_emb * chunk_mask).sum(dim=1)
                chunk_count = chunk_mask.sum(dim=1).clamp(min=1)
                chunk_embeddings.append(chunk_sum / chunk_count)
            embeddings[chunk_indices] = torch.cat(chunk_embeddings)

        embeddings = embeddings + self.pos_encoding[:comp_len].unsqueeze(0)
        mask = (compressed_seq == 0).to(device)
        output = self.transformer(embeddings, src_key_padding_mask=mask)

        mask_output = output[torch.arange(batch_size).unsqueeze(1), mask_indices]
        logits = self.fc(mask_output)  # Shape: (batch_size, num_masks, num_items)
        
        # For predict, ensure only the last mask is used
        if is_predict and len(mask_indices) > 1:
            mask_output = mask_output[:, -1:, :]  # Take only the last mask
            logits = self.fc(mask_output)         # Shape: (batch_size, 1, num_items)
        
        return logits, mask_indices

    def predict(self, input_seq):
        seq = input_seq[:, :max_seq_length]
        seq = torch.where(seq >= self.num_items, torch.zeros_like(seq), seq)
        next_pos = seq.shape[1]
        seq = torch.cat([seq, torch.full((seq.shape[0], 1), self.num_items, device=seq.device)], dim=1)
        mask_positions = torch.tensor([next_pos], device=seq.device)
        logits, _ = self.forward(seq, mask_positions, is_predict=True)
        return logits

# Evaluation
def evaluate(model, user_dict, num_items, max_seq_length, device):
    model.eval()
    NDCG, HR, valid_users = 0.0, 0.0, 0

    for user, items in user_dict.items():
        if len(items) < 2:
            continue

        seq = items[:max_seq_length]
        input_seq = torch.tensor(seq[:-1], dtype=torch.long).unsqueeze(0).to(device)
        target = seq[-1]
        candidates = [target] + random.sample(list(set(range(1, num_items)) - set(items)), num_negatives)

        with torch.no_grad():
            logits = model.predict(input_seq)
            scores = logits[0, 0, candidates]  # Single mask in predict
            
            ranked = torch.argsort(scores, descending=True).cpu().numpy()
            rank = np.where(ranked == 0)[0][0] + 1

        valid_users += 1
        HR += int(rank <= 10)
        NDCG += 1 / np.log2(rank + 1) if rank <= 10 else 0

        # if valid_users % 100 == 0:
        #     print(f"Validated users: {valid_users}, HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")

    # print(f"Final HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")
    return HR / valid_users, NDCG / valid_users


# Load dataset
def load_movielens(file_path):
    user_dict = defaultdict(list)
    item_set = set()
    with open(file_path, 'r') as f:
        for line in f:
            user_id, item_id = map(int, line.strip().split())
            user_dict[user_id].append(item_id)
            item_set.add(item_id)
    num_items = max(item_set)
    return user_dict, num_items

# Main
if __name__ == "__main__":
    file_path = "data/ml-1m.txt"
    user_dict, num_items = load_movielens(file_path)
    print(f"Number of users: {len(user_dict)}, Number of items: {num_items}")

    dataset = MovieLensDataset(user_dict, num_items, max_seq_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)

    model = SequentialRecommender(num_items, embedding_dim, num_heads, num_layers, dropout, window_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss(ignore_index=0)

    print(f"Length of dataloader: {len(dataloader)}")
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_idx, batch in enumerate(dataloader):
            seq = batch["seq"].to(device)
            batch_size = seq.shape[0]
            valid_positions = (seq != 0).sum(dim=0).nonzero(as_tuple=True)[0]
            if len(valid_positions) < num_masks_per_batch:
                continue
            mask_positions = random.sample(valid_positions.tolist(), num_masks_per_batch)
            mask_positions = torch.tensor(mask_positions, device=device)

            logits, mask_indices = model(seq, mask_positions)  # Shape: (batch_size, num_masks, num_items)
            if logits.numel() == 0:
                continue

            targets = seq[torch.arange(batch_size).unsqueeze(1), mask_positions]  # Shape: (batch_size, num_masks_per_batch)
            valid_mask = (targets != num_items) & (targets != 0)  # Shape: (batch_size, num_masks_per_batch)

            num_masks = mask_indices.shape[0]
            logits = logits.view(batch_size * num_masks, num_items)  # Shape: (batch_size * num_masks, num_items)
            targets = targets.view(batch_size * num_masks_per_batch)  # Shape: (batch_size * num_masks_per_batch)
            valid_mask = valid_mask.view(batch_size * num_masks_per_batch)

            targets = targets[valid_mask]
            logits = logits[:targets.shape[0]]  # Align with filtered targets

            if logits.shape[0] != targets.shape[0] or targets.numel() == 0:
                continue

            loss = criterion(logits, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            # if batch_idx % 10 == 0:
            #     print(f"Batch {batch_idx}, Loss: {loss.item():.4f}")

        print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader):.4f}")
        if epoch % 10 == 0:
            # Evaluate every 10 epochs
            print("Evaluating...")
            HR, NDCG = evaluate(model, user_dict, num_items, max_seq_length, device)
            print(f"Epoch {epoch+1}, HR@10: {HR:.4f}, NDCG@10: {NDCG:.4f}")

Number of users: 6040, Number of items: 3416
Length of dataloader: 12
Epoch 1, Loss: 7.9794
Evaluating...


  output = torch._nested_tensor_from_mask(output, src_key_padding_mask.logical_not(), mask_check=False)


Epoch 1, HR@10: 0.3772, NDCG@10: 0.1947
Epoch 2, Loss: 4.4396
Epoch 3, Loss: 7.4858
Epoch 4, Loss: 6.2678
Epoch 5, Loss: 7.4478
Epoch 6, Loss: 6.1570
Epoch 7, Loss: 4.9915
Epoch 8, Loss: 6.8161
Epoch 9, Loss: 5.5573
Epoch 10, Loss: 6.7708
Epoch 11, Loss: 6.1484
Evaluating...
Epoch 11, HR@10: 0.4550, NDCG@10: 0.2506
Epoch 12, Loss: 6.1409
Epoch 13, Loss: 5.5551
Epoch 14, Loss: 6.7167
Epoch 15, Loss: 5.5659
Epoch 16, Loss: 5.5613
Epoch 17, Loss: 6.1731
Epoch 18, Loss: 5.5308
Epoch 19, Loss: 6.1632
Epoch 20, Loss: 5.5201
Epoch 21, Loss: 6.7383
Evaluating...
Epoch 21, HR@10: 0.4616, NDCG@10: 0.2519
Epoch 22, Loss: 4.9680
Epoch 23, Loss: 6.1492
Epoch 24, Loss: 6.1620
Epoch 25, Loss: 4.9555
Epoch 26, Loss: 6.7825
Epoch 27, Loss: 7.4052
Epoch 28, Loss: 6.1605
Epoch 29, Loss: 6.1544
Epoch 30, Loss: 6.1566
Epoch 31, Loss: 6.7728
Evaluating...
Epoch 31, HR@10: 0.4606, NDCG@10: 0.2514
Epoch 32, Loss: 5.5573
Epoch 33, Loss: 6.7396
Epoch 34, Loss: 5.5475
Epoch 35, Loss: 5.5806
Epoch 36, Loss: 6.149