## Load the data

In [15]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

class SequentialRecommendationDataset(Dataset):
    def __init__(self, file_path):
        self.data = []
        self.max_seq_length = 0
        with open(file_path, 'r') as file:
            for line in file:
                parts = line.strip().split(' ')
                user_id = int(parts[0])
                sequence = list(map(int, parts[1:]))
                self.data.append((user_id, sequence))
                self.max_seq_length = max(self.max_seq_length, len(sequence))
                
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        user_id, sequence = self.data[idx]
        input_seq = torch.tensor(sequence[:-1], dtype=torch.long)
        target_seq = torch.tensor(sequence[1:], dtype=torch.long)
        return user_id, input_seq, target_seq
    
def collate_fn(batch):
    user_ids, input_seqs, target_seqs = zip(*batch)
    input_seqs = nn.utils.rnn.pad_sequence(input_seqs, batch_first=True, padding_value=0)
    target_seqs = nn.utils.rnn.pad_sequence(target_seqs, batch_first=True, padding_value=0)
    return user_ids, input_seqs, target_seqs
    
dataset = SequentialRecommendationDataset('sequential_recommendation_data.txt')

dataloader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)



In [16]:
# print the first batch
for user_ids, input_seqs, target_seqs in dataloader:
    print(user_ids)
    print(len(input_seqs))
    print(target_seqs)
    break

(6007, 785, 1791, 5785, 5028, 107, 93, 5809, 3200, 5555, 5016, 1206, 5156, 2624, 773, 1099, 2669, 4297, 4603, 3865, 3293, 1126, 4001, 3233, 1256, 2486, 6019, 416, 3561, 115, 2902, 1154)
32
tensor([[ 110, 3101, 2021,  ...,    0,    0,    0],
        [1193, 1917,  593,  ...,    0,    0,    0],
        [1466, 1610, 2531,  ...,    0,    0,    0],
        ...,
        [2011, 2028, 1407,  ...,    0,    0,    0],
        [1221, 3260, 3654,  ...,    0,    0,    0],
        [1682, 3660,  640,  ...,    0,    0,    0]])


In [17]:
# define the rnn model

class RNNModel(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        x = self.embedding(x)
        out, _ = self.rnn(x, h0)
        out = self.fc(out)
        return out
    
# Hyperparameters
# input size should be the number of items in the dataset + 1 (for padding) , so calculate it from the .txt file
input_size = 0
with open('sequential_recommendation_data.txt', 'r') as file:
    for line in file:
        parts = line.strip().split(' ')
        sequence = list(map(int, parts[1:]))
        input_size = max(input_size, max(sequence))
input_size += 1

print(input_size)
hidden_size = 128
num_layers = 1
num_classes = input_size
num_epochs = 5
learning_rate = 0.03

# Initialize the model, loss function, and optimizer and train the model on gpu
device = torch.device('cuda:5' if torch.cuda.is_available() else 'cpu')
model = RNNModel(input_size, hidden_size, num_layers, num_classes)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
total_step = len(dataloader)
for epoch in range(num_epochs):
    for i, (user_ids, input_seqs, target_seqs) in enumerate(dataloader):
        
        model.train()
        input_seqs, target_seqs = input_seqs.to(device), target_seqs.to(device)
        outputs = model(input_seqs)
        # print(outputs.shape, target_seqs.shape)
        loss = criterion(outputs.view(-1, num_classes), target_seqs.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # if (i+1) % 10 == 0:
        #     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        #            .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')
# Save the model checkpoint
torch.save(model.state_dict(), 'rnnmodel.ckpt')

    

3953
Epoch 1, Loss: 1.699788212776184
Epoch 2, Loss: 1.5240942239761353
Epoch 3, Loss: 0.8340753316879272
Epoch 4, Loss: 0.8427436351776123
Epoch 5, Loss: 1.597973346710205


# Inference

In this section, we will perform inference on our trained model. The goal is to predict the next sequence of items based on a given input item. This is a common scenario in recommendation systems where we want to predict what items a user might interact with next, based on their past interactions.

The process will work as follows:

1. We start by feeding the model an input item.
2. The model will generate a prediction for the next item.
3. We then take the model's prediction and use it as the new input, repeating the process.
4. This is done iteratively, up to 5 times, to generate a sequence of recommended items.

This method of using the model's own predictions as input for subsequent predictions is known as autoregression.

Let's see how this works in practice.

In [18]:
# Now lets do inference where i will give the model one item and it will predict the next sequence upto 5 items(feed the output of the model as input to the model again)

# Load the model checkpoint
model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)
model.load_state_dict(torch.load('rnnmodel.ckpt'))
model.eval()

# Inference

# Choose a random item from the dataset
import random
item = random.randint(1, input_size-1)
print('Starting item:', item)

# Initialize the input sequence with the chosen item
input_seq = torch.tensor([[item]]).to(device)

# Generate the next 5 items in the sequence
with torch.no_grad():
    for _ in range(5):
        output = model(input_seq)
        _, predicted = torch.max(output[:, -1, :], 1)
        input_seq = torch.cat((input_seq, predicted.unsqueeze(1)), dim=1)
        
print('Generated sequence:', input_seq.squeeze().tolist())

torch.save(model.state_dict(), 'rnnmodel.ckpt')

Starting item: 452
Generated sequence: [452, 1210, 1270, 750, 924, 3471]


## Now create the transformer decoder architecture and train it 

In [19]:
# create the transformer deocder model which takes the inout sequence one by one and predicts the next item in the sequence and train the model on gpu use teacher forcing technique

class TransformerDecoder(nn.Module):
    def __init__(self, num_items, embed_size, num_layers, num_heads, hidden_dim):
        super(TransformerDecoder, self).__init__()
        self.item_embedding = nn.Embedding(num_items, embed_size)
        self.transformer_decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(embed_size, num_heads, hidden_dim),
            num_layers
        )
        self.fc = nn.Linear(embed_size, num_items)
        
    def forward(self, input_seqs):
        embeddings = self.item_embedding(input_seqs)
        output = self.transformer_decoder(embeddings, embeddings)
        output = self.fc(output)
        return output
    
# Hyperparameters
num_items = 0
with open('sequential_recommendation_data.txt', 'r') as file:
    for line in file:
        parts = line.strip().split(' ')
        sequence = list(map(int, parts[1:]))
        num_items = max(num_items, max(sequence))
        
num_items += 1
embed_size = 128
num_layers = 1
num_heads = 2
hidden_dim = 256
num_epochs = 5
learning_rate = 0.001

# Initialize the model, loss function, and optimizer and train the model on gpu
device = torch.device('cuda:5' if torch.cuda.is_available() else 'cpu')
model = TransformerDecoder(num_items, embed_size, num_layers, num_heads, hidden_dim)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
total_step = len(dataloader)

for epoch in range(num_epochs):
    for i, (user_ids, input_seqs, target_seqs) in enumerate(dataloader):
        
        model.train()
        input_seqs, target_seqs = input_seqs.to(device), target_seqs.to(device)
        outputs = model(input_seqs)
        loss = criterion(outputs.view(-1, num_items), target_seqs.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')
# Save the model checkpoint
torch.save(model.state_dict(), 'transformermodel.ckpt')


Epoch 1, Loss: 1.245722770690918
Epoch 2, Loss: 2.180192232131958
Epoch 3, Loss: 1.989233136177063
Epoch 4, Loss: 2.133683681488037
Epoch 5, Loss: 1.973725438117981


In [20]:
# Now lets do inference where i will give the model one item and it will predict the next sequence upto 5 items(feed the output of the model as input to the model again)

# Load the model checkpoint
model = TransformerDecoder(num_items, embed_size, num_layers, num_heads, hidden_dim).to(device)
model.load_state_dict(torch.load('transformermodel.ckpt'))
model.eval()

# Inference

# Choose a random item from the dataset
import random
item = random.randint(1, num_items-1)
print('Starting item:', item)

# Initialize the input sequence with the chosen item
input_seq = torch.tensor([[item]]).to(device)

# Generate the next 5 items in the sequence
with torch.no_grad():
    for _ in range(10):
        output = model(input_seq)
        _, predicted = torch.max(output[:, -1, :], 1)
        input_seq = torch.cat((input_seq, predicted.unsqueeze(1)), dim=1)
        
print('Generated sequence:', input_seq.squeeze().tolist())

#save the model checkpoint
torch.save(model.state_dict(), 'transformermodel.ckpt')




Starting item: 717
Generated sequence: [717, 2858, 2706, 2997, 3285, 3298, 2694, 2710, 2770, 2433, 223]


In [21]:
#Load both the models and compare the results of ndcg@5 and ndcg@10 for both the models by calculating dcg and idcg and then calculating ndcg

#load rnn and transformer model

transformer_model = TransformerDecoder(num_items, embed_size, num_layers, num_heads, hidden_dim).to(device)
transformer_model.load_state_dict(torch.load('transformermodel.ckpt'))
transformer_model.eval()

rnn_model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)
rnn_model.load_state_dict(torch.load('rnnmodel.ckpt'))
rnn_model.eval()


# Load the dataset
dataset = SequentialRecommendationDataset('sequential_recommendation_data.txt')

#calculate the dcg and idcg for both the models

def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2)))
    return 0.

def ndcg_at_k(r, k):
    idcg = dcg_at_k(sorted(r, reverse=True), k)
    if not idcg:
        return 0.
    return dcg_at_k(r, k) / idcg

# Calculate the NDCG@5 and NDCG@10 for both models
import numpy as np

ndcg5_rnn = 0
ndcg10_rnn = 0
ndcg5_transformer = 0
ndcg10_transformer = 0

for user_id, input_seq, target_seq in dataloader:
    input_seq = input_seq.to(device)
    target_seq = target_seq.to(device)
    
    # RNN model
    rnn_output = rnn_model(input_seq)
    rnn_output = rnn_output[:, -1, :]
    _, rnn_predicted = torch.topk(rnn_output, k=num_items)
    rnn_predicted = rnn_predicted.cpu().numpy()
    rnn_target = target_seq[:, -1].cpu().numpy()
    rnn_ndcg5 = ndcg_at_k([int(item in rnn_predicted[i]) for i, item in enumerate(rnn_target)], 5)
    rnn_ndcg10 = ndcg_at_k([int(item in rnn_predicted[i]) for i, item in enumerate(rnn_target)], 10)
    ndcg5_rnn += rnn_ndcg5
    ndcg10_rnn += rnn_ndcg10
    
    # Transformer model
    transformer_output = transformer_model(input_seq)
    transformer_output = transformer_output[:, -1, :]
    _, transformer_predicted = torch.topk(transformer_output, k=num_items)
    transformer_predicted = transformer_predicted.cpu().numpy()
    transformer_target = target_seq[:, -1].cpu().numpy()
    transformer_ndcg5 = ndcg_at_k([int(item in transformer_predicted[i]) for i, item in enumerate(transformer_target)], 5)
    transformer_ndcg10 = ndcg_at_k([int(item in transformer_predicted[i]) for i, item in enumerate(transformer_target)], 10)
    ndcg5_transformer += transformer_ndcg5
    ndcg10_transformer += transformer_ndcg10
    
ndcg5_rnn /= len(dataloader)
ndcg10_rnn /= len(dataloader)
ndcg5_transformer /= len(dataloader)
ndcg10_transformer /= len(dataloader)

print(f'RNN Model - NDCG@5: {ndcg5_rnn:.4f}, NDCG@10: {ndcg10_rnn:.4f}')

print(f'Transformer Model - NDCG@5: {ndcg5_transformer:.4f}, NDCG@10: {ndcg10_transformer:.4f}')


RNN Model - NDCG@5: 1.0000, NDCG@10: 1.0000
Transformer Model - NDCG@5: 1.0000, NDCG@10: 1.0000
