In [3]:
import pandas as pd
import numpy as np

file_path = 'train_ratings.csv'
ratings_df = pd.read_csv(file_path)
num_users = 200
num_movies = 100

user_movie_matrix = ratings_df.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
user_movie_matrix_reduced = user_movie_matrix.iloc[:num_users, :num_movies]

# Fonction pour calculer la similarité cosinus entre deux vecteurs
def cosine_similarity(vector_a, vector_b):
    dot_product = np.dot(vector_a, vector_b)
    norm_a = np.linalg.norm(vector_a) 
    norm_b = np.linalg.norm(vector_b) 
    if norm_a == 0 or norm_b == 0:
        return 0
    return dot_product / (norm_a * norm_b)

# Calcul de la similarité cosinus entre tous les films (colonnes)
movie_ids = user_movie_matrix.columns
movie_similarity = pd.DataFrame(index=movie_ids, columns=movie_ids)

for i in movie_ids:
    for j in movie_ids:
        if i != j:
            movie_similarity.loc[i, j] = cosine_similarity(user_movie_matrix[i], user_movie_matrix[j])
        else:
            movie_similarity.loc[i, j] = 1


def predict_rating(user_id, movie_id, user_movie_matrix, movie_similarity, treshold):
    user_ratings = user_movie_matrix.loc[user_id]
    rated_movies = user_ratings[user_ratings > 0].index
    similarities = movie_similarity.loc[movie_id, rated_movies]
    similarities=similarities[similarities > treshold]
    weighted_ratings = 0
    similarity_sum = 0
    for rated_movie in similarities.index:
        similarity = similarities[rated_movie]
        rating = user_ratings[rated_movie]
        weighted_ratings += similarity * rating
        similarity_sum += similarity
    if similarity_sum == 0:
        return 0
    return weighted_ratings / similarity_sum


def predict_missing_ratings(user_movie_matrix, movie_similarity, treshold):
    predictions = {}
    for user_id in user_movie_matrix.index:
        predictions[user_id] = {}
        for movie_id in user_movie_matrix.columns:
            if user_movie_matrix.loc[user_id, movie_id] == 0:
                predicted_rating = predict_rating(user_id, movie_id, user_movie_matrix, movie_similarity, treshold)
                predictions[user_id][movie_id] = predicted_rating
    return predictions

# Calculer les notes prédites pour tous les films non évalués
treshold=0.1
predicted_ratings = predict_missing_ratings(user_movie_matrix, movie_similarity, treshold)


def gen_recommendations(predicted_ratings, top_n=5):
    recommendations = {}

    for user_id, movie_ratings in predicted_ratings.items():
        sorted_ratings = sorted(movie_ratings.items(), key=lambda x: x[1], reverse=True)
        top_recommendations = sorted_ratings[:top_n]
        recommendations[user_id] = top_recommendations
    return recommendations


top_n = 5
recommendations = gen_recommendations(predicted_ratings, top_n=top_n)


In [10]:
%load_ext memory_profiler

In [6]:
user_id = 45 
print(f"Recommandations pour l'utilisateur {user_id} :")
for movie_id, predicted_rating in recommendations[user_id]:
    print(f"Film {movie_id} avec une note prédite de {predicted_rating:.2f}")

Recommandations pour l'utilisateur 45 :
Film 3410 avec une note prédite de 5.00
Film 1313 avec une note prédite de 5.00
Film 1444 avec une note prédite de 5.00
Film 3905 avec une note prédite de 5.00
Film 409 avec une note prédite de 4.53


In [9]:
!pip install memory-profiler


Defaulting to user installation because normal site-packages is not writeable
Collecting memory-profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
[0mInstalling collected packages: memory-profiler
Successfully installed memory-profiler-0.61.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
