<a href="https://colab.research.google.com/github/dykeeIS590DV/dykeeIS590DV.github.io/blob/master/animeRecomender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [94]:
import pandas as pd
from scipy.sparse import csr_matrix

In [95]:
# Load the datasets (update paths or use Google Drive if needed)
anime = pd.read_csv('anime.csv')
ratings = pd.read_csv('rating.csv')

In [67]:
# Merge on 'anime_id'
merged_data = pd.merge(ratings, anime, on='anime_id')

# Rename for clarity
merged_data.rename(columns={'rating_x': 'user_rating', 'rating_y': 'anime_rating'}, inplace=True)

In [68]:
# Drop rows with missing user ratings (-1 usually means no rating)
merged_data = merged_data[merged_data['user_rating'] != -1]

In [69]:
# Filter out users and anime to reduce matrix size
# Keep users with at least 50 ratings
user_counts = merged_data['user_id'].value_counts()
filtered_users = user_counts[user_counts >= 50].index

In [70]:
# Keep anime with at least 100 ratings
anime_counts = merged_data['anime_id'].value_counts()
filtered_anime = anime_counts[anime_counts >= 100].index

In [71]:
# Apply filters
filtered_data = merged_data[
    merged_data['user_id'].isin(filtered_users) &
    merged_data['anime_id'].isin(filtered_anime)
]

In [72]:
# Create user-anime rating matrix
user_anime_matrix = filtered_data.pivot_table(
    index='user_id',
    columns='anime_id',
    values='user_rating'
).fillna(0)

# Convert to sparse matrix to save RAM
sparse_matrix = csr_matrix(user_anime_matrix.values)

# Confirm matrix shape and size
print("Sparse matrix created with shape:", sparse_matrix.shape)

Sparse matrix created with shape: (32967, 4143)


In [73]:
from sklearn.neighbors import NearestNeighbors
import numpy as np

In [74]:
# Step 1: Fit the k-NN model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(sparse_matrix)

In [75]:
# Step 2: Pick a sample user (you can change this!)
user_index = 10000  # Pick the 0th user from user_anime_matrix

In [76]:
# Step 3: Find k nearest neighbors to this user
distances, indices = model_knn.kneighbors(sparse_matrix[user_index], n_neighbors=6)  # include the user itself

In [77]:
# Step 4: Get actual user ID from the index
user_ids = user_anime_matrix.index
target_user_id = user_ids[user_index]

In [78]:
print(f"Recommendations for User ID: {target_user_id}")
print("Top similar users (excluding the target):")

Recommendations for User ID: 22380
Top similar users (excluding the target):


In [79]:
# Show similar users (excluding the user itself at index 0)
for i in range(1, len(distances[0])):
    similar_user_id = user_ids[indices[0][i]]
    print(f"User {similar_user_id} (Distance: {distances[0][i]:.4f})")

User 41995 (Distance: 0.6481)
User 22491 (Distance: 0.6701)
User 17742 (Distance: 0.6755)
User 44363 (Distance: 0.6777)
User 3521 (Distance: 0.6788)


In [80]:
# Step 5: Get the anime rated by the target user
user_seen = set(filtered_data[filtered_data['user_id'] == target_user_id]['anime_id'])


In [81]:
# Step 6: Get recommendations based on similar users
recommendations = []

for i in range(1, len(indices[0])):
    neighbor_id = user_ids[indices[0][i]]
    neighbor_anime = filtered_data[filtered_data['user_id'] == neighbor_id]

    for _, row in neighbor_anime.iterrows():
        if row['anime_id'] not in user_seen and row['user_rating'] >= 8:  # High ratings only
            recommendations.append((row['anime_id'], row['name']))


In [82]:
# Step 7: Count and rank recommended anime
from collections import Counter
top_recommendations = Counter(recommendations).most_common(10)

print("\nTop Anime Recommendations:")
for anime, count in top_recommendations:
    print(f"{anime[1]} (Recommended {count} times)")


Top Anime Recommendations:
Death Note (Recommended 4 times)
Soul Eater (Recommended 4 times)
Kiseijuu: Sei no Kakuritsu (Recommended 4 times)
Mahou Shoujo Madoka★Magica (Recommended 3 times)
Kaichou wa Maid-sama! (Recommended 3 times)
Ao no Exorcist (Recommended 3 times)
Hunter x Hunter (2011) (Recommended 3 times)
Magi: The Labyrinth of Magic (Recommended 3 times)
Hataraku Maou-sama! (Recommended 3 times)
Noragami (Recommended 3 times)


In [96]:
from sklearn.neighbors import NearestNeighbors

# Transpose the sparse matrix so that rows are anime, columns are users
sparse_matrix_T = sparse_matrix.T

# Fit k-NN on items (anime)
item_knn = NearestNeighbors(metric='cosine', algorithm='brute')
item_knn.fit(sparse_matrix_T)

# Map anime IDs back to names for easy lookup
anime_id_to_name = anime.set_index('anime_id')['name'].to_dict()

# List of anime IDs in the same order as sparse_matrix_T
anime_ids_ordered = user_anime_matrix.columns

In [89]:
def get_similar_anime(anime_title, n_recommendations=5):
    # Find the anime ID from the title
    anime_id = anime[anime['name'] == anime_title]['anime_id']
    if anime_id.empty:
        print(f"Anime '{anime_title}' not found in dataset.")
        return

    anime_id = anime_id.values[0]

    try:
        anime_idx = list(anime_ids_ordered).index(anime_id)
    except ValueError:
        print(f"Anime ID {anime_id} not found in filtered matrix.")
        return

    # Find nearest neighbors
    distances, indices = item_knn.kneighbors(sparse_matrix_T[anime_idx], n_neighbors=n_recommendations+1)

    print(f"\nTop {n_recommendations} anime similar to '{anime_title}':\n")
    for i in range(1, len(distances[0])):  # skip the anime itself at index 0
        similar_id = anime_ids_ordered[indices[0][i]]
        print(f"{anime_id_to_name[similar_id]} (Similarity Score: {1 - distances[0][i]:.4f})")

In [99]:
# Example: find similar anime to 'Naruto'
get_similar_anime("", n_recommendations=5)


Top 5 anime similar to 'Death Note':

Code Geass: Hangyaku no Lelouch (Similarity Score: 0.7127)
Code Geass: Hangyaku no Lelouch R2 (Similarity Score: 0.6865)
Elfen Lied (Similarity Score: 0.6729)
Shingeki no Kyojin (Similarity Score: 0.6649)
Fullmetal Alchemist: Brotherhood (Similarity Score: 0.6585)
