In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import random

# Hyperparameters
MAX_SEQ_LENGTH = 200
EMBEDDING_DIM = 64
NUM_HEADS = 2
NUM_LAYERS = 2
BATCH_SIZE = 2048
LR = 0.001
EPOCHS = 100
NEG_SAMPLES = 99

# Load data
def load_data(file_path):
    user_dict = {}
    with open(file_path, 'r') as f:
        for line in f:
            user, item = map(int, line.strip().split())
            if user not in user_dict:
                user_dict[user] = []
            user_dict[user].append(item)
    return user_dict

def prepare_sequences(user_dict, max_seq_length):
    sequences, targets = [], []
    for user, items in user_dict.items():
        for i in range(1, len(items)):
            seq = items[:i][-max_seq_length:]
            pad_length = max_seq_length - len(seq)
            sequences.append([0] * pad_length + seq)  # Left padding
            targets.append(items[i])
    return np.array(sequences), np.array(targets)

class RecDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets
    
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        return torch.tensor(self.sequences[idx], dtype=torch.long), torch.tensor(self.targets[idx], dtype=torch.long)

class TransformerRec(nn.Module):
    def __init__(self, num_items, embedding_dim, num_heads, num_layers, max_seq_length):
        super().__init__()
        self.item_embedding = nn.Embedding(num_items + 1, embedding_dim)
        self.pos_embedding = nn.Embedding(max_seq_length, embedding_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=num_heads)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.fc = nn.Linear(embedding_dim, num_items)
    
    def forward(self, sequences):
        seq_len = sequences.size(1)
        item_embeds = self.item_embedding(sequences)
        pos_ids = torch.arange(seq_len, device=sequences.device).unsqueeze(0)
        pos_embeds = self.pos_embedding(pos_ids)
        
        encoded = self.encoder(item_embeds + pos_embeds)
        output = self.fc(encoded[:, -1, :])  # Predict next item
        return output

def train_model(model, train_loader, num_items, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)
    
    model.train()
    
    print(len(train_loader))
    for epoch in range(EPOCHS):
        total_loss = 0
        for sequences, targets in train_loader:
            sequences, targets = sequences.to(device), targets.to(device)
            optimizer.zero_grad()
            logits = model(sequences)
            # print(logits.shape, targets.shape)
            loss = criterion(logits, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}")
        evaluate(model, user_dict, num_items, MAX_SEQ_LENGTH, device)

def evaluate(model, user_dict, num_items, max_seq_length, device):
    model.eval()
    NDCG, HR = 0.0, 0.0
    valid_users = 0
    
    for user, items in user_dict.items():
        if len(items) < 2:
            continue
        
        seq = items[:-1][-max_seq_length:]
        pad_length = max_seq_length - len(seq)
        # print(pad_length)
        # print(min(seq))
        input_seq = torch.tensor([0] * pad_length + seq, dtype=torch.long).unsqueeze(0).to(device)
        
        target = items[-1]
        negative_samples = random.sample(set(range(1, num_items)) - set(items), NEG_SAMPLES)
        candidates = [target] + negative_samples
        
        with torch.no_grad():
            logits = model(input_seq)
            scores = logits.squeeze(0)[candidates]
        
        # print(scores.shape)
        ranked = np.argsort(-scores.cpu().numpy())
        rank = np.where(ranked == 0)[0][0] + 1  # Rank of positive sample
        
        # print(rank)
        valid_users += 1
        HR += int(rank <= 10)
        NDCG += 1 / np.log2(rank + 1) if rank <= 10 else 0
        
    
    print(f"HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")
    
# Main script
if __name__ == "__main__":
    device = torch.device("cuda:3" if  torch.cuda.is_available() else "cpu")
    user_dict = load_data("data/ml-1m.txt")
    num_items = max(max(items) for items in user_dict.values())+1
    
    sequences, targets = prepare_sequences(user_dict, MAX_SEQ_LENGTH)
    train_dataset = RecDataset(sequences, targets)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    model = TransformerRec(num_items, EMBEDDING_DIM, NUM_HEADS, NUM_LAYERS, MAX_SEQ_LENGTH).to(device)
    train_model(model, train_loader, num_items, device)
    




486
Epoch 1, Loss: 7.3614
HR@10: 0.5469, NDCG@10: 0.3464
Epoch 2, Loss: 6.6449
HR@10: 0.6531, NDCG@10: 0.4320
Epoch 3, Loss: 6.4286
HR@10: 0.6649, NDCG@10: 0.4446
Epoch 4, Loss: 6.3404
HR@10: 0.6702, NDCG@10: 0.4424
Epoch 5, Loss: 6.2867
HR@10: 0.6791, NDCG@10: 0.4563
Epoch 6, Loss: 6.2499
HR@10: 0.6896, NDCG@10: 0.4600
Epoch 7, Loss: 6.2215
HR@10: 0.6747, NDCG@10: 0.4551
Epoch 8, Loss: 6.1997
HR@10: 0.6863, NDCG@10: 0.4592
Epoch 9, Loss: 6.1819
HR@10: 0.6887, NDCG@10: 0.4611
Epoch 10, Loss: 6.1665
HR@10: 0.6982, NDCG@10: 0.4745
Epoch 11, Loss: 6.1539
HR@10: 0.6798, NDCG@10: 0.4537
Epoch 12, Loss: 6.1423
HR@10: 0.6927, NDCG@10: 0.4657
Epoch 13, Loss: 6.1323
HR@10: 0.6929, NDCG@10: 0.4650
Epoch 14, Loss: 6.1234
HR@10: 0.6935, NDCG@10: 0.4673
Epoch 15, Loss: 6.1156
HR@10: 0.6859, NDCG@10: 0.4574
Epoch 16, Loss: 6.1083
HR@10: 0.6980, NDCG@10: 0.4616
Epoch 17, Loss: 6.1021
HR@10: 0.6993, NDCG@10: 0.4638
Epoch 18, Loss: 6.0965
HR@10: 0.6965, NDCG@10: 0.4632
Epoch 19, Loss: 6.0915
HR@10: 0.6

In [2]:
def evaluate(model, user_dict, num_items, max_seq_length, device):
    model.eval()
    NDCG, HR = 0.0, 0.0
    valid_users = 0
    
    for user, items in user_dict.items():
        if len(items) < 2:
            continue
        
        seq = items[:-1][-max_seq_length:]
        pad_length = max_seq_length - len(seq)
        # print(pad_length)
        # print(min(seq))
        input_seq = torch.tensor([0] * pad_length + seq, dtype=torch.long).unsqueeze(0).to(device)
        
        target = items[-1]
        negative_samples = random.sample(set(range(1, num_items)) - set(items), NEG_SAMPLES)
        candidates = [target] + negative_samples
        
        with torch.no_grad():
            logits = model(input_seq)
            scores = logits.squeeze(0)[candidates]
        
        # print(scores.shape)
        ranked = np.argsort(-scores.cpu().numpy())
        rank = np.where(ranked == 0)[0][0] + 1  # Rank of positive sample
        
        # print(rank)
        valid_users += 1
        HR += int(rank <= 10)
        NDCG += 1 / np.log2(rank + 1) if rank <= 10 else 0
    
    print(f"HR@10: {HR / valid_users:.4f}, NDCG@10: {NDCG / valid_users:.4f}")

evaluate(model, user_dict, num_items, MAX_SEQ_LENGTH, device)

HR@10: 0.7149, NDCG@10: 0.4689


In [1]:
print("Hi")

Hi
