# Нейросетевые рекомендации

### Сравните качество Bert4Rec и Sas4Rec на данных КИОНа и выберите лучшую модель

*   Проанализируйте воспроизводимость (10 баллов)
*   Проанализируйте качество модели (8 баллов)

In [18]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertModel, BertTokenizer
from sklearn.model_selection import KFold

from pathlib import Path

## Data Processing

In [183]:
interactions = pd.read_csv('/kaggle/input/kion-dataset/interactions.csv')
users = pd.read_csv('/kaggle/input/kion-dataset/users.csv')
items = pd.read_csv('/kaggle/input/kion-dataset/items.csv')

In [184]:
interactions = interactions.query("last_watch_dt >= '2021-07-15'")

In [185]:
interactions['last_watch_dt'] = pd.to_datetime(interactions['last_watch_dt'])

In [186]:
# Fill missing values with appropriate values or strategies
users['age'].fillna('unknown', inplace=True)
users['income'].fillna('unknown', inplace=True)
users['sex'].fillna('unknown', inplace=True)

items['release_year'].fillna(items['release_year'].median(), inplace=True)
items['countries'].fillna('unknown', inplace=True)
items['age_rating'].fillna(items['age_rating'].median(), inplace=True)
items['directors'].fillna('unknown', inplace=True)
items['actors'].fillna('unknown', inplace=True)
items['description'].fillna('unknown', inplace=True)
items['keywords'].fillna('unknown', inplace=True)


In [187]:
users = pd.get_dummies(users, columns=['age', 'income', 'sex'])

In [188]:
interactions = interactions.merge(users, on='user_id', how='left')
interactions['total_dur'] = (interactions['total_dur'] - interactions['total_dur'].mean()) / interactions['total_dur'].std()
item_id_mapping = {item_id: idx for idx, item_id in enumerate(items['item_id'].unique())}
interactions['item_index'] = interactions['item_id'].map(item_id_mapping)


In [189]:
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader, random_split

class InteractionDataset(Dataset):
    def __init__(self, item_index, max_sequence_length):
        self.sequences = self.create_padded_sequences(item_index, max_sequence_length)
        self.masks = self.create_mask(self.sequences)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        sequence = self.sequences[index]
        mask = self.masks[index]
        return torch.tensor(sequence, dtype=torch.long), torch.tensor(mask, dtype=torch.long)


    def create_padded_sequences(self, item_index, max_length):
        padded_sequences = []
        for sequence in item_index:
            sequence = [int(digit) for digit in str(sequence)]  # Convert integer to a list of digits
            if len(sequence) < max_length:
                sequence += [0] * (max_length - len(sequence))  # Pad the sequence with zeros
            padded_sequences.append(sequence)
        return padded_sequences

    def create_mask(self, sequences):
        masks = []
        for sequence in sequences:
            mask = [1 if item != 0 else 0 for item in sequence]
            masks.append(mask)
        return masks

In [190]:
# Assuming you have loaded your data into the following variables
item_index = interactions['item_index']
max_sequence_length = 5

# Create InteractionDataset
dataset = InteractionDataset(item_index, max_sequence_length)

In [191]:
# Define the sizes for train, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

In [192]:
# Split the dataset into train, validation, and test sets
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Define batch size
batch_size = 32

# Create data loaders for train, validation, and test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


## BERT4Rec

In [193]:
import torch
from transformers import BertModel
import torch.nn as nn

class Bert4Rec(nn.Module):
    def __init__(self, num_items, hidden_size=768, num_attention_heads=12, num_hidden_layers=6, dropout_prob=0.1):
        super(Bert4Rec, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(hidden_size, num_items)
    
    def forward(self, item_ids):
        item_ids_embed = self.bert(item_ids)[0]  # Use the output of the first transformer layer
        item_ids_embed = self.dropout(item_ids_embed)
        logits = self.fc(item_ids_embed)
        return logits


In [194]:
# Set the hyperparameters
# batch_size = 32
num_items = len(train_loader)
hidden_size = 768
num_layers = 2
num_heads = 4
dropout = 0.1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BERT4Rec(num_items, hidden_size, num_layers, num_heads, dropout, device)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [195]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [196]:
import time
from tqdm import tqdm

In [198]:
# Training loop
num_epochs = 1
for epoch in tqdm(range(1, num_epochs + 1)):
    model.train()
    total_loss = 0.0
    for item_ids, masks in train_loader:
        optimizer.zero_grad()
        item_ids = item_ids.to(device)
        masks = masks.to(device)
        logits = model(item_ids, masks)  # Pass tensors directly to the model on the same device
        logits_flat = logits.view(-1, logits.shape[-1])  # Reshape the logits tensor
        item_ids_flat = item_ids.view(-1)[:logits_flat.size(0)]  # Reshape the item_ids tensor
        loss = criterion(logits_flat, item_ids_flat)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    train_loss = total_loss / len(train_loader)

    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for item_ids, masks in val_loader:
            item_ids = item_ids.to(device)
            masks = masks.to(device)
            logits = model(item_ids, masks)  # Pass tensors directly to the model on the same device
            logits_flat = logits.view(-1, logits.shape[-1])  # Reshape the logits tensor
            item_ids_flat = item_ids.view(-1)  # Reshape the item_ids tensor
            loss = criterion(logits_flat, item_ids_flat)
            total_loss += loss.item()
    val_loss = total_loss / len(val_loader)

    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")


  0%|          | 0/1 [1:08:25<?, ?it/s]


ValueError: Expected input batch_size (32) to match target batch_size (160).

## SAS4Rec

In [None]:
import torch.nn.functional as F

class SAS4Rec(nn.Module):
    def __init__(self, num_items, embedding_size, hidden_size, num_attention_heads):
        super(SAS4Rec, self).__init__()
        self.num_items = num_items
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_attention_heads = num_attention_heads
        
        # Define the item embeddings
        self.item_embeddings = nn.Embedding(num_items, embedding_size)
        
        # Define the self-attention mechanism
        self.self_attention = nn.MultiheadAttention(embedding_size, num_attention_heads)
        
        # Define the feed-forward network
        self.feed_forward = nn.Sequential(
            nn.Linear(embedding_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, embedding_size)
        )
        
    def forward(self, item_ids):
        # Apply the item embeddings
        item_embeddings = self.item_embeddings(item_ids)
        # Transpose the item embeddings to match the shape expected by the self-attention module
        item_embeddings = item_embeddings.permute(1, 0, 2)
        # Apply the self-attention mechanism
        output, _ = self.self_attention(item_embeddings, item_embeddings, item_embeddings)
        # Transpose the output back to the original shape
        output = output.permute(1, 0, 2)
        # Apply the feed-forward network
        output = self.feed_forward(output)
        # Reshape the output to match the shape of the input item_ids
        output = output.permute(0, 2, 1)
        # Apply global max pooling to obtain a fixed-size representation
        output = F.max_pool1d(output, output.size(2)).squeeze(2)
        
        return output

In [None]:
# Define hyperparameters
embedding_size = 128
hidden_size = 256
num_attention_heads = 4
batch_size = 32
num_epochs = 10
learning_rate = 0.001

# Initialize the SAS4Rec model
model = SAS4Rec(num_items, embedding_size, hidden_size, num_attention_heads)

In [None]:
# Move the model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Define the loss function
criterion = nn.CrossEntropyLoss()
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for item_ids, masks in train_loader:
    item_ids = item_ids.to(device)
    logits = model(item_ids)
    print(logits.shape)
    logits_flat = logits.reshape(-1, logits.shape[-1])  # Reshape the logits tensor
    print(logits_flat.shape)
    item_ids_flat = item_ids.reshape(-1)[:logits_flat.size(0)]  # Reshape the item_ids tensor
    print(item_ids_flat.shape)
    #loss = criterion(logits_flat, item_ids_flat)
    
    break

In [None]:
# Training loop
num_epochs = 1
for epoch in tqdm(range(1, num_epochs + 1)):
    model.train()
    total_loss = 0.0
    for item_ids, masks in train_loader:
        optimizer.zero_grad()
        item_ids = item_ids.to(device)
        masks = masks.to(device)
        logits = model(item_ids)  # Pass tensors directly to the model on the same device
        logits_flat = logits.reshape(-1, logits.shape[-1])  # Reshape the logits tensor
        item_ids_flat = item_ids.reshape(-1)[:logits_flat.size(0)]  # Reshape the item_ids tensor
        loss = criterion(logits_flat, item_ids_flat)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    train_loss = total_loss / len(train_loader)

    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for item_ids, masks in val_loader:
            item_ids = item_ids.to(device)
            masks = masks.to(device)
            logits = model(item_ids)  # Pass tensors directly to the model on the same device
            logits_flat = logits.reshape(-1, logits.shape[-1])  # Reshape the logits tensor
            item_ids_flat = item_ids.reshape(-1)[:logits_flat.size(0)]  # Reshape the item_ids tensor
            loss = criterion(logits_flat, item_ids_flat)
            total_loss += loss.item()
    val_loss = total_loss / len(val_loader)

    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
