https://www.kaggle.com/code/abdallahwagih/collaborative-filtering-recommendation-system/notebook
Collaborative Filtering Recommendation System
Python · Anime Recommendations Database

In [1]:
# Import needed modules
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator

In [3]:
# Load the anime dataset
anime_df = pd.read_csv('anime.csv')
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,99933,Akame ga kill,"Drama, Romance, School, Supernatural",Movie,1,9.97,200630
2,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
3,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
4,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572


In [4]:
# Load the rating dataset
rating_df = pd.read_csv('rating.csv')
rating_df.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [5]:
# For computing reasons I'm limiting the dataframe length to 10,000 users
rating_df = rating_df[rating_df.user_id <= 10000]

In [6]:
# Preprocess the rating dataset (handle -1 ratings as NaN)
rating_df['rating'] = rating_df['rating'].replace(-1, pd.NA)

In [7]:
 #Delete duplicated items
rating_df = rating_df.drop_duplicates(['user_id', 'anime_id'])

In [8]:
# Create a user-item interaction matrix
user_item_matrix = rating_df.pivot(index='user_id', columns='anime_id', values='rating')

In [24]:
# Calculate user similarity using cosine similarity
user_similarity = cosine_similarity(user_item_matrix.fillna(0))

In [25]:
def user_based_recommendations(user_id, user_item_matrix, user_similarity, n=5):
    # Get the similarity scores for the target user
    user_scores = user_similarity[user_id - 1]

    # Sort users by similarity in descending order
    similar_users = sorted(enumerate(user_scores), key=lambda x: x[1], reverse=True)

    # Initialize a list to store recommended anime
    recommended_anime = []

    for user, score in similar_users[1:]:  # Exclude the target user
        # Get the anime the similar user has rated positively
        rated_anime = user_item_matrix.loc[user_item_matrix.index[user - 1]]
        positively_rated_anime = rated_anime[rated_anime >= 6].index

        # Exclude anime the target user has already rated
        target_user_rated_anime = user_item_matrix.loc[user_id].dropna().index
        recommended_anime.extend(set(positively_rated_anime) - set(target_user_rated_anime))

        # Limit the number of recommendations
        if len(recommended_anime) >= n:
            break

    return recommended_anime[:n]

In [26]:
# Calculate item similarity using cosine similarity
item_similarity = cosine_similarity(user_item_matrix.fillna(0).T)

In [27]:
def item_based_recommendations(user_id, user_item_matrix, item_similarity, n=5):
    # Get the anime the user has rated positively
    positively_rated_anime = user_item_matrix.loc[user_id][user_item_matrix.loc[user_id] >= 6].index

    # Initialize a list to store recommended anime
    recommended_anime = []

    for anime_id in positively_rated_anime:
        # Get anime similar to the liked anime
        similar_anime = item_similarity[anime_id - 1]

        # Sort similar anime by similarity in descending order
        similar_anime_sorted = sorted(enumerate(similar_anime), key=lambda x: x[1], reverse=True)

        for anime, score in similar_anime_sorted[1:]:  # Exclude the liked anime
            # Exclude anime the user has already rated
            user_rated_anime = user_item_matrix.loc[user_id].dropna().index
            if anime not in user_rated_anime:
                recommended_anime.append(anime)

            # Limit the number of recommendations
            if len(recommended_anime) >= n:
                break

    return recommended_anime[:n]

In [28]:
# Choose a user ID for testing
user_id = 9  # Replace with the user ID you want to test

# Test user-based recommendations
user_recommendations = user_based_recommendations(user_id, user_item_matrix, user_similarity, n=5)

# Test item-based recommendations
item_recommendations = item_based_recommendations(user_id, user_item_matrix, item_similarity, n=5)

In [29]:
# Display the recommendations
print(f"User-Based Recommendations for User {user_id}:")
for anime_id in user_recommendations:
    anime_name = anime_df.loc[anime_df['anime_id'] == anime_id, 'name'].values[0]
    print(f"- {anime_name}")

print(f"\nItem-Based Recommendations for User {user_id}:")
for anime_id in item_recommendations:
    anime_name = anime_df.loc[anime_df['anime_id'] == anime_id, 'name'].values[0]
    print(f"- {anime_name}")

User-Based Recommendations for User 9:
- Campione!: Matsurowanu Kamigami to Kamigoroshi no Maou
- Yamada-kun to 7-nin no Majo (TV)
- Re:Zero kara Hajimeru Isekai Seikatsu
- Haiyore! Nyaruko-san
- Ore ga Ojousama Gakkou ni &quot;Shomin Sample&quot; Toshite Gets♥Sareta Ken

Item-Based Recommendations for User 9:
- Toshokan Sensou
- Black Jack (TV)
- Hi no Tori: Houou-hen
- Yume Miru, Anime: on-chan
- Kakyuusei 2: Sketchbook
