In [7]:
import numpy as np
import pandas as pd
import pickle
from sklearn.neighbors import NearestNeighbors

In [8]:
movie_matrix = pd.read_csv("C:/Users/Victor/Documents/A_Projet_MLOP/Template_MLOps_movie_recommandation-master/data/processed/movie_matrix.csv")

model = NearestNeighbors(n_neighbors=20, algorithm="ball_tree").fit(
        movie_matrix.drop("movieId", axis=1)
    )

In [9]:
def make_predictions(users_id, model,  user_matrix_filename):
    # Read user_matrix
    users = pd.read_csv(user_matrix_filename)

    # Filter with the list of users_id
    users = users[users["userId"].isin(users_id)]

    # Delete userId
    users = users.drop("userId", axis=1)

    # Calculate nearest neighbors
    _, indices = model.kneighbors(users)

    # Select 10 random numbers from each row
    # 
    selection = np.array(
        [np.random.choice(row, size=20, replace=False) for row in indices]
    )

    return selection

In [10]:
def apk(actual, predicted, k=20):
    """Average Precision at K"""
    if not actual:
        return 0.0

    predicted = predicted[:k]
    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    return score / min(len(actual), k)
    
def mapk(actual, predicted, k = 20):
    """Mean Average Precision at K"""
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

def recall_at_k(actual, predicted, k=20):
    """Recall at K"""
    recalls = []
    
    for act, pred in zip(actual, predicted):
        actual_genres_set = set([genre for sublist in act[:k] for genre in sublist])
        predicted_genres_set = set([genre for sublist in pred[:k] for genre in sublist])
        
        if len(actual_genres_set) == 0:  # Avoid division by zero
            recalls.append(0)
        else:
            recalls.append(len(actual_genres_set & predicted_genres_set) / len(actual_genres_set))
    
    return np.mean(recalls)

In [11]:
from collections import Counter

genres = pd.read_csv("C:/Users/Victor/Documents/A_Projet_MLOP/Template_MLOps_movie_recommandation-master/data/raw/movies.csv")

def extract_movie_genres(movie_id):
    movie_genres = genres[genres["movieId"] == movie_id]["genres"].str.split("|").tolist()
    return movie_genres

def movie_genres(movies,k = 5):
    G = []
    for movie in movies:
        genres_list = Counter()
        for Id in movie:
            list = extract_movie_genres(Id)[0]
            genres_list.update(list)
        most_like_genres = genres_list.most_common(k)
        G.append([genre for genre, _ in most_like_genres])
    return G

def predicted_genres(movies):
    G = []
    for movie in movies:
        L = []
        for Id in movie:
            for val in extract_movie_genres(Id)[0]:
                L.append(val)
        G.append(L)
    return G


In [12]:
user_matrix = pd.read_csv("C:/Users/Victor/Documents/A_Projet_MLOP/Template_MLOps_movie_recommandation-master/data/processed/user_matrix.csv")
movie_matrix = pd.read_csv("C:/Users/Victor/Documents/A_Projet_MLOP/Template_MLOps_movie_recommandation-master/data/processed/movie_matrix.csv")

test_users = np.random.choice(user_matrix["userId"].unique(), size=20, replace=False)

predictions = make_predictions(
    test_users, model , "C:/Users/Victor/Documents/A_Projet_MLOP/Template_MLOps_movie_recommandation-master/data/processed/user_matrix.csv"
) 

ratings = pd.read_csv("C:/Users/Victor/Documents/A_Projet_MLOP/Template_MLOps_movie_recommandation-master/data/raw/ratings.csv")
test_user_ratings = ratings[(ratings["userId"].isin(test_users)) & (ratings['rating'] > 3.5)]

actual_movies = [
    test_user_ratings[test_user_ratings["userId"] == user]["movieId"].tolist()
    for user in test_users
]

actual_genres = movie_genres(actual_movies)
recommended_genres = predicted_genres(predictions)

mapk_score = mapk(actual_genres, recommended_genres, k=10)
recall_score = recall_at_k(actual_genres, recommended_genres, k=10)

metrics = {"MAP@20": mapk_score, "Recall@20": recall_score}


print(f"MAP@10: {mapk_score:.4f}")
print(f"Recall@10: {recall_score:.4f}")


MAP@10: 0.5128
Recall@10: 0.9024


In [13]:
print("Example actual:", actual_genres[0])
print("Example predicted:", recommended_genres[0])
print("apk:", apk(actual_genres[0], recommended_genres[0], k=20))

Example actual: ['Comedy', 'Drama', 'Romance', 'Crime', 'Mystery']
Example predicted: ['Children', 'Comedy', 'Comedy', 'Romance', 'Comedy', 'Action', 'Comedy', 'Crime', 'Drama', 'Thriller', 'Documentary', 'Action', 'Thriller', 'Action', 'Adventure', 'Comedy', 'Crime', 'Adventure', 'Children', 'Action', 'Crime', 'Drama', 'Adventure', 'Animation', 'Children', 'Drama', 'Thriller', 'Crime', 'Drama', 'Adventure', 'Children', 'Fantasy', 'Comedy', 'Romance', 'Documentary', 'Drama', 'Romance', 'Comedy', 'Action', 'Sci-Fi', 'Thriller', 'Drama', 'Romance', 'Comedy']
apk: 0.3638888888888889


In [14]:
predictions

array([[ 54,  39, 119, 145, 128,  89, 112, 146,  42,  13,  61,  30,  56,
        122, 136, 140, 141,  66,  25, 125],
       [ 42,  30, 112, 145,  56,  54, 119,  61,  13, 125,  89, 146,  39,
         66,  25, 128, 141, 140, 122, 136],
       [145,  61, 146,  30, 125,  66,  39,  13, 141, 136,  89, 119,  42,
         25,  54, 140,  56, 128, 122, 112],
       [128, 125, 146, 112, 140, 145, 136,  25,  66,  13,  54,  42,  61,
         56, 122, 119,  39,  30,  89, 141],
       [125, 145, 146,  39, 128,  61, 140,  89, 122,  56, 136, 112, 119,
         30,  13,  25,  54, 141,  42,  66],
       [125, 122, 146,  39, 145,  30, 119,  54,  42,  61, 112,  13, 140,
        128,  56,  25, 136, 141,  89,  66],
       [ 42, 122,  13,  66,  30, 136,  54, 145, 112, 140, 125,  56,  89,
        128, 146, 119,  25,  61,  39, 141],
       [ 89, 140, 145,  13, 122,  30,  54,  39, 125,  56, 119,  42,  61,
         25,  66, 146, 136, 112, 128, 141],
       [122, 136,  30, 119,  56, 125,  42, 141, 112,  39,  66, 1

In [15]:
print(predicted_genres(predictions)[0])

['Children', 'Comedy', 'Comedy', 'Romance', 'Comedy', 'Action', 'Comedy', 'Crime', 'Drama', 'Thriller', 'Documentary', 'Action', 'Thriller', 'Action', 'Adventure', 'Comedy', 'Crime', 'Adventure', 'Children', 'Action', 'Crime', 'Drama', 'Adventure', 'Animation', 'Children', 'Drama', 'Thriller', 'Crime', 'Drama', 'Adventure', 'Children', 'Fantasy', 'Comedy', 'Romance', 'Documentary', 'Drama', 'Romance', 'Comedy', 'Action', 'Sci-Fi', 'Thriller', 'Drama', 'Romance', 'Comedy']


In [16]:
G = []
for movie in predictions:
    L = []
    for Id in movie:
        for val in extract_movie_genres(Id)[0]:
            L.append(val)
    G.append(L)
print(G)

[['Children', 'Comedy', 'Comedy', 'Romance', 'Comedy', 'Action', 'Comedy', 'Crime', 'Drama', 'Thriller', 'Documentary', 'Action', 'Thriller', 'Action', 'Adventure', 'Comedy', 'Crime', 'Adventure', 'Children', 'Action', 'Crime', 'Drama', 'Adventure', 'Animation', 'Children', 'Drama', 'Thriller', 'Crime', 'Drama', 'Adventure', 'Children', 'Fantasy', 'Comedy', 'Romance', 'Documentary', 'Drama', 'Romance', 'Comedy', 'Action', 'Sci-Fi', 'Thriller', 'Drama', 'Romance', 'Comedy'], ['Action', 'Crime', 'Drama', 'Crime', 'Drama', 'Action', 'Adventure', 'Comedy', 'Crime', 'Action', 'Comedy', 'Crime', 'Drama', 'Thriller', 'Adventure', 'Children', 'Fantasy', 'Children', 'Comedy', 'Comedy', 'Drama', 'Thriller', 'Adventure', 'Animation', 'Children', 'Comedy', 'Action', 'Thriller', 'Adventure', 'Children', 'Comedy', 'Romance', 'Action', 'Sci-Fi', 'Thriller', 'Drama', 'Romance', 'Documentary', 'Comedy', 'Drama', 'Romance', 'Comedy', 'Romance', 'Documentary'], ['Action', 'Comedy', 'Crime', 'Drama', 'Thr

In [17]:
predictions

array([[ 54,  39, 119, 145, 128,  89, 112, 146,  42,  13,  61,  30,  56,
        122, 136, 140, 141,  66,  25, 125],
       [ 42,  30, 112, 145,  56,  54, 119,  61,  13, 125,  89, 146,  39,
         66,  25, 128, 141, 140, 122, 136],
       [145,  61, 146,  30, 125,  66,  39,  13, 141, 136,  89, 119,  42,
         25,  54, 140,  56, 128, 122, 112],
       [128, 125, 146, 112, 140, 145, 136,  25,  66,  13,  54,  42,  61,
         56, 122, 119,  39,  30,  89, 141],
       [125, 145, 146,  39, 128,  61, 140,  89, 122,  56, 136, 112, 119,
         30,  13,  25,  54, 141,  42,  66],
       [125, 122, 146,  39, 145,  30, 119,  54,  42,  61, 112,  13, 140,
        128,  56,  25, 136, 141,  89,  66],
       [ 42, 122,  13,  66,  30, 136,  54, 145, 112, 140, 125,  56,  89,
        128, 146, 119,  25,  61,  39, 141],
       [ 89, 140, 145,  13, 122,  30,  54,  39, 125,  56, 119,  42,  61,
         25,  66, 146, 136, 112, 128, 141],
       [122, 136,  30, 119,  56, 125,  42, 141, 112,  39,  66, 1

In [18]:
extract_movie_genres(119)[0]

['Comedy']