#  Setup et Imports

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from notebook_helpers import load_all_data, logger

# Chargement des DonnÃ©es

In [None]:
df_anime, df_genres, df_studios = load_all_data()
print(df_anime.shape)
print(df_genres.shape)
print(df_studios.shape)

In [None]:
sum_genres = df_genres['genre'].nunique()
print(sum_genres)

# Matrice de contingence Anime x Genres

In [None]:
# 1. CrÃ©er la matrice anime x genre
df_merged = pd.merge(df_anime, df_genres[['anime_id', 'genre']], on='anime_id')
anime_genre_matrix = pd.pivot_table(
    df_merged,
    index = 'title',
    columns = 'genre',
    values = 'anime_id',
    aggfunc = 'count',
    fill_value = 0
)
print(anime_genre_matrix.shape)

# Cosine Similarity entre anime

In [None]:
similarity_matrix = cosine_similarity(anime_genre_matrix)
print(similarity_matrix.shape)


In [None]:
print (similarity_matrix[0])  # SimilaritÃ© avec le premier anime

# Fonction de recommandation

In [None]:
def get_recommendations(anime_title, similarity_matrix, top_k=5):
    """
    Recommande les K animes les plus similaires
    
    Args:
        anime_title (str): Titre de l'anime de rÃ©fÃ©rence
        similarity_matrix (np.ndarray): Matrice de similaritÃ©
        top_k (int): Nombre de recommandations
        
    Returns:
        pd.Series: Top K animes similaires avec leurs scores
    """
    # VÃ©rifier que l'anime existe
    anime_genre_matrix_index = anime_genre_matrix.index
    if anime_title not in anime_genre_matrix_index:
        logger.warning(f"Anime '{anime_title}' not found in the dataset.")
        return None

    # Trouver la position de l'anime dans la matrice
    index = anime_genre_matrix_index.get_loc(anime_title)

    # RÃ©cupÃ©rer les scores de similaritÃ©
    sim_scores = pd.Series(
        similarity_matrix[index], 
        index=anime_genre_matrix_index
    )

    # Trier par ordre dÃ©croissant
    sim_scores = sim_scores.sort_values(ascending=False)

    # Exclure l'anime lui-mÃªme
    sim_scores = sim_scores.drop(anime_title)
    
    # Filtrage anti-doublons (sÃ©quelles, OVA, etc.)
    final_recommendations = []
    seen_franchises = set()
    source_root = anime_title[:10].lower()
    seen_franchises.add(source_root)

    for title, score in sim_scores.items():
        candidate_root = title[:10].lower()

        if candidate_root in seen_franchises:   
            continue

        final_recommendations.append((title, score))
        seen_franchises.add(candidate_root)
        if len(final_recommendations) >= top_k:
            break
    
    # Convertir en Series
    if final_recommendations:
        titles, scores = zip(*final_recommendations)
        logger.info(f"Extracted {len(titles)} recommendations for '{anime_title}'.")
        return pd.Series(scores, index=titles)
    else:
        logger.warning(f"No recommendations found for '{anime_title}'.")
        return pd.Series(dtype=float)

In [None]:
# Test de la fonction de recommandation

# 1. Tester avec un anime alÃ©atoire
test_anime = 'Shingeki no Kyojin'    #anime_genre_matrix.index[100]  # Prendre le 100Ã¨me anime
print(f"ðŸŽ¯ Recommandations pour '{test_anime}' :")
print("-" * 60)
recs = get_recommendations(test_anime, similarity_matrix, top_k=10)
print(recs)
test_anime = 'Naruto'    #anime_genre_matrix.index[100]  # Prendre le 100Ã¨me anime
print(f"ðŸŽ¯ Recommandations pour '{test_anime}' :")
print("-" * 60)
recs = get_recommendations(test_anime, similarity_matrix, top_k=10)
print(recs)
