In [99]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import re

In [3]:
# 1. Load Data
anime_df = pd.read_csv('anime.csv')
rating_df = pd.read_csv('rating.csv')
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [5]:
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [7]:
# 2. Data Preprocessing
anime_df.dropna(subset=['genre'], inplace=True)
anime_df['genre'] = anime_df['genre'].astype(str)
rating_df = rating_df[rating_df['rating'] != -1]
merged_df = pd.merge(rating_df, anime_df, on='anime_id')

In [9]:
# 3. Handle NaN in 'genre' and ensure string type
anime_df.dropna(subset=['genre'], inplace=True) # Remove rows with NaN in 'genre'
anime_df['genre'] = anime_df['genre'].astype(str)

In [11]:
# 3. Feature Extraction (Genre)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime_df['genre'])

In [13]:
# 4. Compute Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [15]:
# 5. Recommendation Function (Cosine Similarity of Anime Features)
def recommend_anime_by_features(anime_name, cosine_sim=cosine_sim, df=anime_df, num_recommendations=10):
    try:
        idx = df.index[df['name'] == anime_name][0]
    except IndexError:
        return "Anime not found."

    sim_scores = sorted(enumerate(cosine_sim[idx]), key=lambda x: x[1], reverse=True)[1:num_recommendations + 1]
    anime_indices = [i[0] for i in sim_scores]
    return df['name'].iloc[anime_indices].tolist()

In [17]:
# 6. User-Item Matrix for Collaborative Filtering
user_item_matrix = merged_df.pivot_table(index='user_id', columns='name', values='rating_x').fillna(0)
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)

In [18]:
# 7. Train KNN Model (Collaborative Filtering)
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_item_matrix_sparse)


In [22]:
 #8. Recommendation Function (Collaborative Filtering)
def recommend_anime_cf(user_id, num_recommendations=10):
    try:
        user_index = user_item_matrix.index.get_loc(user_id)
    except KeyError:
        return "User not found."

    distances, indices = model_knn.kneighbors(user_item_matrix.iloc[user_index, :].values.reshape(1, -1), n_neighbors=100)
    neighbors_ratings = user_item_matrix.iloc[indices.flatten()[1:]]
    avg_ratings = neighbors_ratings.mean()
    unrated_anime = user_item_matrix.columns[user_item_matrix.iloc[user_index] == 0]
    recommendations = avg_ratings[unrated_anime].sort_values(ascending=False)[:num_recommendations].index.tolist()
    return recommendations

In [24]:
# 9. Example Recommendations
anime_to_recommend = "Fullmetal Alchemist: Brotherhood"
recommendations_features = recommend_anime_by_features(anime_to_recommend)
print(f"Recommendations for '{anime_to_recommend}' (Features):")
print(recommendations_features)

user_to_recommend = user_item_matrix.index[0]
recommendations_cf = recommend_anime_cf(user_to_recommend)
print(f"\nRecommendations for User {user_to_recommend} (Collaborative Filtering):")
print(recommendations_cf)

Recommendations for 'Fullmetal Alchemist: Brotherhood' (Features):
['Fullmetal Alchemist', 'Fullmetal Alchemist: The Sacred Star of Milos', 'Fullmetal Alchemist: Brotherhood Specials', 'Tales of Vesperia: The First Strike', 'Tide-Line Blue', 'Fullmetal Alchemist: Reflections', 'Magi: The Kingdom of Magic', 'Magi: The Labyrinth of Magic', 'Magi: Sinbad no Bouken (TV)', 'Magi: Sinbad no Bouken']

Recommendations for User 1 (Collaborative Filtering):
['Death Note', 'High School DxD BorN', 'Shingeki no Kyojin', 'Angel Beats!', 'Naruto', 'Sword Art Online II', 'High School DxD Specials', 'Ao no Exorcist', 'Mirai Nikki (TV)', 'Date A Live']


In [115]:
# 10. Evaluation (Collaborative Filtering)
def evaluate_precision_recall_optimized(test_df, user_item_matrix, recommend_anime_cf, top_n=3):
    precisions = []
    recalls = []

    user_recommendations = {}
    for user_id in test_df['user_id'].unique():
        if user_id in user_item_matrix.index:
            recommendations = recommend_anime_cf(user_id, num_recommendations=top_n)
            user_recommendations[user_id] = recommendations
        else:
            user_recommendations[user_id] = []

    for user_id in test_df['user_id'].unique():
        user_test_data = test_df[test_df['user_id'] == user_id]
        actual_rated_anime = set(user_test_data['name'].tolist())
        predicted_rated_anime = set(user_recommendations[user_id])

        common_anime = actual_rated_anime.intersection(predicted_rated_anime)
        precision = len(common_anime) / top_n if predicted_rated_anime else 0
        recall = len(common_anime) / len(actual_rated_anime) if actual_rated_anime else 0

        precisions.append(precision)
        recalls.append(recall)

    return np.mean(precisions), np.mean(recalls)

# Evaluation function
def evaluate_cf(merged_df, user_item_matrix, recommend_anime_cf):
    train_df, test_df = train_test_split(merged_df, test_size=0.2, random_state=42)
    precision, recall = evaluate_precision_recall_optimized(test_df, user_item_matrix, recommend_anime_cf)
    print(f"Precision (Collaborative Filtering): {precision}")
    print(f"Recall (Collaborative Filtering): {recall}")
    return precision, recall

# Run the evaluation
evaluate_cf(merged_df, user_item_matrix, recommend_anime_cf)

Precision (Collaborative Filtering): 0.3333333333333333
Recall (Collaborative Filtering): 1.0


(0.3333333333333333, 1.0)

1. User-Based vs. Item-Based Collaborative Filtering:

- User-Based: Recommends items based on what similar users liked.
- Item-Based: Recommends items similar to what the user liked.

2. What is Collaborative Filtering?

Collaborative filtering recommends items based on user preferences and similarities. It predicts what a user might like by analyzing what similar users or similar items were liked.