# Élaboration d’un modèle Content-Based Filtering

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
import random

In [3]:
# Chargement des embeddings des articles
ROOT_PATH ="/code/dataset/"
articles_embeddings = pd.read_csv(ROOT_PATH + "clicks_sample.csv")  # Utilisation du dataset correct
user_interactions = pd.read_csv(ROOT_PATH + "clicks_sample.csv")  # Même dataset pour interactions utilisateur


In [4]:
# Fonction pour choisir un article comme base de recommandation
def get_reference_article(user_id, strategy="last_clicked"):
    user_clicks = user_interactions[user_interactions['user_id'] == user_id]
    if user_clicks.empty:
        return None
    
    if strategy == "last_clicked":
        return user_clicks.sort_values(by='timestamp', ascending=False).iloc[0]['article_id']
    elif strategy == "random":
        return random.choice(user_clicks['article_id'].values)
    elif strategy == "mean_embedding":
        clicked_articles = user_clicks['article_id'].values
        embeddings = articles_embeddings.set_index("article_id").loc[clicked_articles].values
        return np.mean(embeddings, axis=0)  # Moyenne des embeddings
    else:
        raise ValueError("Stratégie inconnue")

In [5]:
# Fonction de recommandation basée sur la similarité cosinus
def recommend_articles(user_id, strategy="last_clicked", top_n=5, use_pca=False, n_components=50):
    reference_article = get_reference_article(user_id, strategy)
    
    if reference_article is None:
        return []
    
    if strategy == "mean_embedding":
        ref_embedding = reference_article
    else:
        ref_embedding = articles_embeddings.set_index("article_id").loc[reference_article].values.reshape(1, -1)
    
    embeddings = articles_embeddings.drop(columns=["article_id"]).values
    article_ids = articles_embeddings["article_id"].values
    
    if use_pca:
        pca = PCA(n_components=n_components)
        embeddings = pca.fit_transform(embeddings)
        ref_embedding = pca.transform(ref_embedding)
    
    # Calcul de similarité cosinus
    similarities = cosine_similarity(ref_embedding, embeddings)[0]
    similar_indices = np.argsort(similarities)[::-1][1:top_n+1]
    
    recommended_articles = [article_ids[i] for i in similar_indices]
    
    return recommended_articles

In [7]:
# Test avec un utilisateur
test_user_id = 5283  # Remplacez par un user_id réel
print("Articles recommandés :", recommend_articles(test_user_id, strategy="last_clicked"))


Articles recommandés : []
