In [12]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
import scipy.sparse as sp
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

In [13]:
# 1. Load and preprocess the data
anime_df = pd.read_csv("anime.csv")
anime_df = anime_df.dropna(subset=["name", "genre"])
anime_df["rating"] = anime_df["rating"].fillna(0)
anime_df["episodes"] = anime_df["episodes"].replace("Unknown", 0).fillna(0).astype(float)
anime_df["type"] = anime_df["type"].fillna("Unknown")
anime_df["members"] = anime_df["members"].fillna(0)

In [14]:
# 2. Feature extraction
anime_df["combined"] = anime_df["genre"] + " " + anime_df["type"]
tfidf = TfidfVectorizer(token_pattern=r"[^, ]+")
genre_type_matrix = tfidf.fit_transform(anime_df["combined"])

scaler = MinMaxScaler()
numerical_features = scaler.fit_transform(anime_df[["rating", "episodes", "members"]])
numerical_matrix = sp.csr_matrix(numerical_features)
feature_matrix = sp.hstack([genre_type_matrix, numerical_matrix])

In [15]:
# 3. Split data
train_df, test_df, train_matrix, test_matrix = train_test_split(anime_df, feature_matrix, test_size=0.2, random_state=42)

In [16]:
# 4. Fit Nearest Neighbors model
nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
nn_model.fit(train_matrix)

In [17]:
# Map titles to indices
train_index_map = pd.Series(train_df.index, index=train_df["name"]).drop_duplicates()

In [18]:

# 5. Recommendation function
def recommend_anime(title, top_n=10):
    idx = train_index_map.get(title)
    if idx is None:
        return f"'{title}' not found in training data."
    anime_vec = train_matrix[idx]
    distances, indices = nn_model.kneighbors(anime_vec, n_neighbors=top_n + 1)
    result_indices = indices.flatten()[1:]  # exclude the anime itself
    return train_df.iloc[result_indices][["name", "genre", "rating"]].reset_index(drop=True)

In [19]:
# 6. Evaluation
def evaluate_recommender(test_df, test_matrix, k=10):
    def split_genres(s):
        return set([g.strip().lower() for g in s.split(",")])

    train_genres = [split_genres(g) for g in train_df["genre"]]
    precisions, recalls = [], []

    for i in range(test_df.shape[0]):
        query_vec = test_matrix[i]
        distances, indices = nn_model.kneighbors(query_vec, n_neighbors=k)
        predicted_genres = [train_genres[j] for j in indices.flatten()]

        true_genres = split_genres(test_df.iloc[i]["genre"])
        matches = sum(len(true_genres & pg) > 0 for pg in predicted_genres)

        precision = matches / k
        total_relevant = sum(len(true_genres & tg) > 0 for tg in train_genres)
        recall = matches / total_relevant if total_relevant else 0.0

        precisions.append(precision)
        recalls.append(recall)

    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)
    f1 = 2 * avg_precision * avg_recall / (avg_precision + avg_recall + 1e-9)

    return {"Precision@K": avg_precision, "Recall@K": avg_recall, "F1@K": f1}

In [21]:
# Run evaluation and display
metrics = evaluate_recommender(test_df, test_matrix, k=10)

# Replace ace_tools with standard print
print("\nEvaluation Metrics:")
print(pd.DataFrame([metrics]))

# Show recommendations for a known anime
print("\nRecommendations for Naruto:")
print(recommend_anime("Naruto", top_n=5))



Evaluation Metrics:
   Precision@K  Recall@K      F1@K
0     0.998406  0.005038  0.010025

Recommendations for Naruto:
                                                name  \
0                            Seitokai Yakuindomo OVA   
1            Hidamari Sketch: Sae Hiro Sotsugyou-hen   
2                               Ichigo Mashimaro OVA   
3  Little Busters!: Sekai no Saitou wa Ore ga Mam...   
4  Watashi ga Motenai no wa Dou Kangaetemo Omaera...   

                                    genre  rating  
0  Comedy, School, Shounen, Slice of Life    7.89  
1           Comedy, School, Slice of Life    8.19  
2           Comedy, School, Slice of Life    7.83  
3           Comedy, School, Slice of Life    7.48  
4           Comedy, School, Slice of Life    7.16  
