In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import sys
# add root folder to path
folder = "../../"
sys.path.append(folder)
from src.utils import load_data
from src.utils import plot_metrics_grid
from src.utils import load_baseline_rec_result
from src.metrics import evaluate_recommender_system

In [2]:
users,ratings,movies = load_data('../../data/ml-1m')


In [3]:
from sklearn.model_selection import train_test_split
# Merge the datasets
data = ratings.merge(users, on='user_id').merge(movies, on='movie_id')

# Extract year from title
data['year'] = data['title'].str.extract(r'\((\d{4})\)').astype(float)

# Split genres into separate columns
data['genres'] = data['genres'].str.split('|')

# Create a DataFrame for each unique genre and merge them into the main DataFrame
genres_expanded = data['genres'].explode().unique()
genre_columns = pd.get_dummies(data['genres'].explode()).groupby(level=0).max()

# Join the new genre columns to the main DataFrame
data = data.join(genre_columns)

# Drop unnecessary columns
data = data.drop(columns=['title', 'genres', 'zip'])

# Fill missing year values with the median year
data['year'].fillna(data['year'].median(), inplace=True)

data['gender'] = data['gender'].apply(lambda x: x == 'M')
data.drop(columns=['timestamp'], inplace=True)
# Split the data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Print the resulting DataFrames to check the changes
train_data.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['year'].fillna(data['year'].median(), inplace=True)


Unnamed: 0,user_id,movie_id,rating,gender,age,occupation,year,Action,Adventure,Animation,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
416292,2507,3035,2,True,25,4,1955.0,False,False,False,...,False,False,False,False,False,False,False,False,True,False
683230,4087,2840,4,True,1,4,1999.0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2434,19,457,3,True,1,10,1993.0,True,False,False,...,False,False,False,False,False,False,False,True,False,False
688533,4118,2804,4,True,25,3,1983.0,False,False,False,...,False,False,False,False,False,False,False,False,False,False
472584,2907,805,4,False,35,5,1996.0,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [4]:
movie_features_names = ['year', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime',
                'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
                'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

In [5]:
import numpy as np

def generate_listwise_data(data, num_documents=25, k=1):
    grouped_data = data.groupby('user_id')
    listwise_data = []

    for user_id, sub_df in grouped_data:
        # Extract user features
        user_features = sub_df[['gender', 'age', 'occupation']].iloc[0].values

        for _ in range(k):
            # Sample documents for the user
            num_samples = min(num_documents, sub_df.shape[0])
            movie_samples = sub_df.sample(n=num_samples)

            # Extract movie features and relevance scores
            movie_features = movie_samples[movie_features_names].values
            relevance_scores = movie_samples['rating'].values

            # Pad movie features and relevance scores if fewer than num_documents
            if num_samples < num_documents:
                padding_size = num_documents - num_samples
                
                # Pad movie features with -1 and relevance scores with 0
                padded_movie_features = np.vstack([
                    movie_features,
                    np.full((padding_size, movie_features.shape[1]), -1)
                ])
                padded_relevance_scores = np.concatenate([
                    relevance_scores,
                    np.full(padding_size, 0)
                ])
            else:
                padded_movie_features = movie_features
                padded_relevance_scores = relevance_scores
            
            listwise_data.append((user_features, padded_movie_features, padded_relevance_scores))
    
    # Shuffle listwise_data to mix padded data positions
    np.random.shuffle(listwise_data)

    return listwise_data

# Example usage:
train_listwise = generate_listwise_data(train_data, num_documents=25, k=15)
test_listwise = generate_listwise_data(test_data, num_documents=25, k=3)

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class ListwiseDataset(Dataset):
    def __init__(self, data_list):
        """
        Args:
            data_list (list of tuples): Each tuple contains (user_features, movie_features, relevance_scores)
        """
        self.data_list = data_list

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        user_features, movie_features, relevance_scores = self.data_list[idx]
        return {
            'user_features': torch.tensor(user_features.astype(np.float32), dtype=torch.float),
            'movie_features': torch.tensor(movie_features.astype(np.float32), dtype=torch.float),
            'relevance_scores': torch.tensor(relevance_scores.astype(np.float32), dtype=torch.float)
        }

# Example usage
train_dataset = ListwiseDataset(train_listwise)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = ListwiseDataset(test_listwise)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ListwiseRankingModel(nn.Module):
    def __init__(self, user_feature_dim, movie_feature_dim, hidden_dim=64):
        super(ListwiseRankingModel, self).__init__()
        self.user_feature_layer = nn.Linear(user_feature_dim, hidden_dim)
        self.movie_feature_layer = nn.Linear(movie_feature_dim, hidden_dim)
        self.merging_layer = nn.Linear(2 * hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, 1)

    def forward(self, user_features, movie_features):
        # Process user features
        user_hidden = F.relu(self.user_feature_layer(user_features))
        
        # Process movie features
        movie_hidden = F.relu(self.movie_feature_layer(movie_features))
        
        # Merge user and movie features
        merged_features = torch.cat((user_hidden.unsqueeze(1).repeat(1, movie_features.size(1), 1), movie_hidden), dim=2)
        merged_hidden = F.relu(self.merging_layer(merged_features))
        
        # Compute scores for each movie
        scores = self.output_layer(merged_hidden)
        return scores.squeeze()

In [20]:
user_feature_dim = len(['gender', 'age', 'occupation'])
movie_feature_dim = len(movie_features_names)

model = ListwiseRankingModel(user_feature_dim, movie_feature_dim)
# Example input
user_features = torch.randn(1, user_feature_dim)  # Example user features
movie_features = torch.randn(1, 25, movie_feature_dim)  # Example movie features for 25 movies

scores = model(user_features, movie_features)
print(scores)

tensor([0.0826, 0.1650, 0.1250, 0.1298, 0.1530, 0.1319, 0.0607, 0.1825, 0.1290,
        0.1463, 0.0593, 0.1144, 0.1258, 0.0847, 0.2279, 0.0480, 0.1915, 0.0648,
        0.1219, 0.1895, 0.0604, 0.0485, 0.1525, 0.1831, 0.1956],
       grad_fn=<SqueezeBackward0>)


In [21]:
from sklearn.metrics import ndcg_score


In [43]:
import torch.nn.functional as F


def mse_loss(pred_scores, true_scores):
    return F.mse_loss(pred_scores, true_scores)

# Loss function
def listnet_loss(pred_scores, true_scores):
    true_probs = F.softmax(true_scores, dim=1)
    pred_probs = F.softmax(pred_scores, dim=1)
    return -torch.mean(torch.sum(true_probs * torch.log(pred_probs + 1e-8), dim=1))

# Metrics computation
def compute_metrics(pred_scores, true_scores):
    ndcg = ndcg_score(true_scores, pred_scores)
    avg_precision = 0# average_precision_score(true_scores, pred_scores)
    return ndcg, avg_precision

# Training and evaluation function
def train_and_evaluate(model, train_dataloader, val_dataloader, num_epochs=10, learning_rate=0.001):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Evaluate on validation set before training
    model.eval()
    all_pred_scores = []
    all_true_scores = []
    with torch.no_grad():
        for batch in val_dataloader:
            user_features = batch['user_features'].to(device)
            movie_features = batch['movie_features'].to(device)
            relevance_scores = batch['relevance_scores'].to(device)
            pred_scores = model(user_features, movie_features)
            all_pred_scores.append(pred_scores.cpu().numpy())
            all_true_scores.append(relevance_scores.cpu().numpy())
    
    pred_scores = np.concatenate(all_pred_scores)
    true_scores = np.concatenate(all_true_scores)
    
    ndcg, avg_precision = compute_metrics(pred_scores, true_scores)
    print(f'Before training NDCG: {ndcg:.4f}, MAP: {avg_precision:.4f}')
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for batch in train_dataloader:
            user_features = batch['user_features'].to(device)
            movie_features = batch['movie_features'].to(device)
            relevance_scores = batch['relevance_scores'].to(device)

            optimizer.zero_grad()
            pred_scores = model(user_features, movie_features)
            loss = mse_loss(pred_scores, relevance_scores)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        
        avg_loss = total_loss / len(train_dataloader)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}')
        
        # Evaluate on training set
        model.eval()
        all_pred_scores = []
        all_true_scores = []
        with torch.no_grad():
            for batch in train_dataloader:
                user_features = batch['user_features'].to(device)
                movie_features = batch['movie_features'].to(device)
                relevance_scores = batch['relevance_scores'].to(device)
                
                pred_scores = model(user_features, movie_features)
                all_pred_scores.append(pred_scores.cpu().numpy())
                all_true_scores.append(relevance_scores.cpu().numpy())
        
        pred_scores = np.concatenate(all_pred_scores)
        true_scores = np.concatenate(all_true_scores)
        
        train_ndcg, train_avg_precision = compute_metrics(pred_scores, true_scores)
        print(f'Epoch {epoch + 1}/{num_epochs}, Training NDCG: {train_ndcg:.4f}, MAP: {train_avg_precision:.4f}')
        
        # Evaluate on validation set
        model.eval()
        all_pred_scores = []
        all_true_scores = []
        with torch.no_grad():
            for batch in val_dataloader:
                user_features = batch['user_features'].to(device)
                movie_features = batch['movie_features'].to(device)
                relevance_scores = batch['relevance_scores'].to(device)
                
                pred_scores = model(user_features, movie_features)
                all_pred_scores.append(pred_scores.cpu().numpy())
                all_true_scores.append(relevance_scores.cpu().numpy())
        
        pred_scores = np.concatenate(all_pred_scores)
        true_scores = np.concatenate(all_true_scores)
        
        val_ndcg, val_avg_precision = compute_metrics(pred_scores, true_scores)
        print(f'Epoch {epoch + 1}/{num_epochs}, Validation NDCG: {val_ndcg:.4f}, MAP: {val_avg_precision:.4f}')


# Train and evaluate
train_and_evaluate(model, train_dataloader, test_dataloader, num_epochs=2, learning_rate=0.01)

Before training NDCG: 0.9225, MAP: 0.0000
Epoch 1/2, Loss: 1.1917
Epoch 1/2, Training NDCG: 0.9196, MAP: 0.0000
Epoch 1/2, Validation NDCG: 0.9225, MAP: 0.0000
Epoch 2/2, Loss: 1.1918
Epoch 2/2, Training NDCG: 0.9196, MAP: 0.0000
Epoch 2/2, Validation NDCG: 0.9225, MAP: 0.0000


In [44]:
users_id_unique = test_data['user_id'].unique()
movie_id_unique = test_data['movie_id'].unique()

#sample for each user 25 random movies that he has in test set
recommendations = {}
for user_id in users_id_unique:
    recommendations[int(user_id)] = np.random.choice(test_data['movie_id'], 5, replace=False).tolist()

In [45]:
def rank_recommendations(model, test_data, recommendations, device='cuda'):
    model.eval()
    ranked_recommendations = {}
    
    for user_id, movie_ids in recommendations.items():
        
        user_features = test_data[test_data['user_id'] == user_id][['gender', 'age', 'occupation']].iloc[0].values
        movie_features = []
        for movie_id in movie_ids:
            movie_features.append(test_data[test_data['movie_id'] == movie_id][movie_features_names].iloc[0].values)

        # Convert to numpy array
        user_features = np.array(user_features)
        movie_features = np.array(movie_features)

        # Check the shape of movie_features

        # We need to add padding to the movie features if there are less than 25 movies
        num_movies = len(movie_ids)
        num_features_per_movie = len(movie_features_names)

        if num_movies < 25:
            padding_size = 25 - num_movies
            # Create a padding array of shape (padding_size, num_features_per_movie) filled with -1
            padding_array = -1 * np.ones((padding_size, num_features_per_movie))
            # Append the padding array to movie_features
            movie_features = np.vstack((movie_features, padding_array))
        
        
        # Convert to tensors
        user_features_tensor = torch.tensor(user_features.astype(np.float32), dtype=torch.float32).unsqueeze(0).to(device)
        movie_features_tensor = torch.tensor(movie_features.astype(np.float32), dtype=torch.float32).unsqueeze(0).to(device)
        pred_scores = model(user_features_tensor, movie_features_tensor)
        
        # Sort movie_ids based on predicted scores
        # pick only not padded movie ids
        pred_scores = pred_scores[:num_movies]
        sorted_indices = torch.argsort(pred_scores, descending=True).cpu().numpy()
        ranked_recommendations[user_id] = [movie_ids[i] for i in sorted_indices]
    
    return ranked_recommendations


ranked_recommendations = rank_recommendations(model, test_data, recommendations)


In [46]:
total_amount_of_movies = test_data['movie_id'].nunique()
random_res = evaluate_recommender_system(recommendations, test_data, total_amount_of_movies)

In [47]:
ranked_random_res = evaluate_recommender_system(ranked_recommendations, test_data, total_amount_of_movies)

In [48]:
random_res

{'Precision@K': np.float64(0.02805564756541902),
 'Recall@K': np.float64(0.004659922027470331),
 'NDCG@K': np.float64(0.5966269300717518),
 'MAP@K': np.float64(0.4557720057720058),
 'MRR': np.float64(0.058545876117919844),
 'Hit Rate@K': np.float64(0.1275256707519046),
 'Coverage@K': 0.8246225319396051}

In [49]:
ranked_random_res

{'Precision@K': np.float64(0.02805564756541902),
 'Recall@K': np.float64(0.004659922027470331),
 'NDCG@K': np.float64(0.5952947606684268),
 'MAP@K': np.float64(0.45756854256854257),
 'MRR': np.float64(0.059299436899635634),
 'Hit Rate@K': np.float64(0.1275256707519046),
 'Coverage@K': 0.8246225319396051}

In [50]:
torch.save(model.state_dict(), '../../artifacts/listwise_ranking_model.pth')

## Conclusions

We can see slight changes in ranking metrics, however they are not really that positive, we assume that this is because ranking can not solve the issue of recommending model meaning that in this specific case we randomly select recommendations and if there no good recommendations in provided by recommender model the ranking just does not make much sense. Another assumption might be that the model is overfitted even though it has high NDCG on eval data, there might be some issues with preparing data for model training which might explain very little impact on ranking afterwards.

We will know in more details whether ranking helps us when we will combine ranking with providing better recommendations for ranking model in second task.