In [132]:
import sys
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from surprise import Dataset, Reader, SVD

# add root folder to path
folder = "../../"
sys.path.append(folder)
from src.utils import load_data
from src.metrics import evaluate_recommender_system


In [133]:
# Load the data
users, ratings, movies = load_data('../../data/ml-1m')

# Merge datasets
data = ratings.merge(users, on='user_id').merge(movies, on='movie_id')
data['year'] = data['title'].str.extract(r'\((\d{4})\)').astype(float)
data['genres'] = data['genres'].str.split('|')
genre_columns = pd.get_dummies(data['genres'].explode()).groupby(level=0).max()
data = data.join(genre_columns).drop(columns=['title', 'genres', 'zip'])
data['year'].fillna(data['year'].median(), inplace=True)
data['gender'] = data['gender'].apply(lambda x: x == 'M')
data.drop(columns=['timestamp'], inplace=True)
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data.head()

movie_features_names = ['year', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime',
                        'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
                        'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']


In [134]:
reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(train_data[['user_id', 'movie_id', 'rating']], reader)
trainset = surprise_data.build_full_trainset()

algo = SVD()
algo.fit(trainset)

def generate_candidates(algo, test_data, num_candidates=25):
    user_ids = test_data['user_id'].unique()
    candidates = {}

    for user_id in user_ids:
        all_movie_ids = test_data['movie_id'].unique()

        rated_movie_ids = train_data[train_data['user_id'] == user_id]['movie_id'].unique()

        # Predict ratings for movies the user hasn't rated
        unseen_movie_ids = list(set(all_movie_ids) - set(rated_movie_ids))
        predictions = [(movie_id, algo.predict(user_id, movie_id).est) for movie_id in unseen_movie_ids]

        top_candidates = sorted(predictions, key=lambda x: x[1], reverse=True)[:num_candidates]
        candidates[user_id] = [movie_id for movie_id, _ in top_candidates]

    return candidates

# Generate candidates
candidates = generate_candidates(algo, test_data)


In [135]:
def rank_recommendations(model, test_data, recommendations, device='cpu'):
    model.eval()
    ranked_recommendations = {}

    for user_id, movie_ids in recommendations.items():
        user_features = test_data[test_data['user_id'] == user_id][['gender', 'age', 'occupation']].iloc[0].values
        movie_features = []
        for movie_id in movie_ids:
            movie_features.append(test_data[test_data['movie_id'] == movie_id][movie_features_names].iloc[0].values)

        user_features = np.array(user_features)
        movie_features = np.array(movie_features)

        num_movies = len(movie_ids)
        num_features_per_movie = len(movie_features_names)

        if num_movies < 25:
            padding_size = 25 - num_movies
            padding_array = -1 * np.ones((padding_size, num_features_per_movie))
            movie_features = np.vstack((movie_features, padding_array))

        user_features_tensor = torch.tensor(user_features.astype(np.float32), dtype=torch.float32).unsqueeze(0).to(device)
        movie_features_tensor = torch.tensor(movie_features.astype(np.float32), dtype=torch.float32).unsqueeze(0).to(device)
        pred_scores = model(user_features_tensor, movie_features_tensor)

        pred_scores = pred_scores[:num_movies]
        sorted_indices = torch.argsort(pred_scores, descending=True).cpu().numpy()
        ranked_recommendations[user_id] = [movie_ids[i] for i in sorted_indices]

    return ranked_recommendations


In [136]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ListwiseRankingModel(nn.Module):
    def __init__(self, user_feature_dim, movie_feature_dim, hidden_dim=64):
        super(ListwiseRankingModel, self).__init__()
        self.user_feature_layer = nn.Linear(user_feature_dim, hidden_dim)
        self.movie_feature_layer = nn.Linear(movie_feature_dim, hidden_dim)
        self.merging_layer = nn.Linear(2 * hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, 1)

    def forward(self, user_features, movie_features):
        # Process user features
        user_hidden = F.relu(self.user_feature_layer(user_features))

        # Process movie features
        movie_hidden = F.relu(self.movie_feature_layer(movie_features))

        # Merge user and movie features
        merged_features = torch.cat((user_hidden.unsqueeze(1).repeat(1, movie_features.size(1), 1), movie_hidden), dim=2)
        merged_hidden = F.relu(self.merging_layer(merged_features))

        # Compute scores for each movie
        scores = self.output_layer(merged_hidden)
        return scores.squeeze()

In [137]:
user_feature_dim = len(['gender', 'age', 'occupation'])
movie_feature_dim = len(movie_features_names)
model = ListwiseRankingModel(user_feature_dim, movie_feature_dim)
model.load_state_dict(torch.load('../../artifacts/listwise_ranking_model.pth', map_location=torch.device('cpu'), weights_only=True))
model.eval()

ranked_recommendations = rank_recommendations(model, test_data, candidates)

In [138]:
total_amount_of_movies = test_data['movie_id'].nunique()
initial_evaluation = evaluate_recommender_system(candidates, test_data, total_amount_of_movies)
ranked_evaluation = evaluate_recommender_system(ranked_recommendations, test_data, total_amount_of_movies)

print("Initial Evaluation:", initial_evaluation)
print("Ranked Evaluation:", ranked_evaluation)


Initial Evaluation: {'Precision@K': 0.09199933741924798, 'Recall@K': 0.014827771270951173, 'NDCG@K': 0.6821845424483866, 'MAP@K': 0.5739600363306085, 'MRR': 0.20609408522427342, 'Hit Rate@K': 0.3039589199933742, 'Coverage@K': 0.18822170900692842}
Ranked Evaluation: {'Precision@K': 0.07811827066423721, 'Recall@K': 0.012455640405723827, 'NDCG@K': 0.6065776087765464, 'MAP@K': 0.46275921466318815, 'MRR': 0.1549950689594734, 'Hit Rate@K': 0.275136657280106, 'Coverage@K': 0.21882217090069284}


As we can see:
 -> Precision and recall got worse after ranking, showing the model isn't helping much.
 -> More diverse items were recommended, but it hurt precision and recall.
 -> The model and features need better tuning; that's why we don't see big benefits.
 
Overall, the ranking didn't add much value because the model needs more further tuning.


### Drawbacks and benefits of 2-stage RS

### Drawbacks:
1. **More Complex**: A two-stage system is harder to set up and maintain.
2. **Higher Costs**: Running two steps uses more computing power.
3. **Risk of Worse Results**: If not tuned well, the ranking step can lower recommendation quality(actually what we can see on practice).
4. **Slower Fixes**: Problems can take longer to fix because of the extra step.

### Benefits:
1. **Better Accuracy**: When done right, this approach can give more personalized recommendations.
2. **Scalable**: It can handle large datasets better by narrowing down options first.
3. **Flexible**: You can use different models for each step, making it easier to improve.
4. **Improved User Experience**: Helps deliver more relevant recommendations to users.