# Anime Recommender - Collaborative filtering

This project aims to build an anime recommender using collaborative filtering, 
a method that predicts user preferences by anticipating what someone with similar tastes would also enjoy. 

Collaborative filtering comes in two forms:

- **User-based:** Recommends items by finding similar users and suggesting items that they have liked or interacted with.
- **Item-based:** Recommends items by finding similar items to those that the user has shown interest in. Note that this is different from content-based filtering as the 'similarity' is based on its relationship with users, not the content.

By leveraging user interactions and item similarities, this recommender provides personalized anime recommendations based on user preferences and behaviors.


## Import required libraries

In [56]:
import pandas as pd
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse, mae

## Import cleaned dataset

In [26]:

anime_reviews = pd.read_csv("datasets/anime_review_cleaned.csv")
anime_data = pd.read_csv("datasets/anime_2020_clean.csv")



## Model training

### Preparation: Merging the dataset
We would filter create the user-item matrix as both user-based and item-based uses the same matrix

In [60]:

## REMOVE LATER MAYBE-------
# anime_data = anime_data.drop('score', axis=1)
##THIS LINE -------

# Merge data
merged_data = pd.merge(anime_data, reviews_data, left_on='uid', right_on='anime_uid')
# merged_data.head()

# Define rating scale
reader = Reader(rating_scale=(1, 10))

# Load data into Surprise dataset format
data = Dataset.load_from_df(merged_data[['profile', 'uid', 'score']], reader)


## Attempt 1: User-based collaborative filtering

### Create and fit model

In [35]:


# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build user-based collaborative filtering model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)
model.fit(trainset)



Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x16c9e9c90>

### Checking accuracy

In [50]:
# from surprise import accuracy

# Make predictions
predictions = model.test(testset)

# Evaluate model
accuracy = rmse(predictions)
print("RMSE:", accuracy)


# # Calculate RMSE and MAE
# rmse = accuracy.rmse(predictions)
# mae = accuracy.mae(predictions)

# print("RMSE:", rmse)
# print("MAE:", mae)


RMSE: 1.9756
RMSE: 1.9756028737805376


### Model testing

In [30]:
### Test for a user in the dataset

# Generate recommendations for a specific user
user_id = 'skrn'
anime_ids = [uid for uid in anime_data['uid'].values if uid not in merged_data[merged_data['profile'] == user_id]['uid'].values]

# Predict ratings for items not rated by the user
predicted_ratings = {}
for anime_id in anime_ids:
    predicted_rating = model.predict(user_id, anime_id).est
    predicted_ratings[anime_id] = predicted_rating

# Sort predicted ratings and recommend top N items
top_n = 10
recommended_anime_ids = sorted(predicted_ratings, key=predicted_ratings.get, reverse=True)[:top_n]

# Print recommended anime titles
recommended_anime_titles = anime_data[anime_data['uid'].isin(recommended_anime_ids)]['title'].values
print("Recommended Anime Titles:")
for title in recommended_anime_titles:
    print(title)

In [31]:
#test
anime_data[anime_data['uid']==10165]

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,link
4211,10165,Nichijou,Nichijou primarily focuses on the daily antic...,"['Slice of Life', 'Comedy', 'School', 'Shounen']","Apr 3, 2011 to Sep 25, 2011",26.0,497276,137,https://myanimelist.net/anime/10165/Nichijou


## Attempt 2: Item-based collaborative filtering

### Create and fit model

In [61]:
#REPEATED, maybe should remove
# # Define rating scale
# reader = Reader(rating_scale=(1, 10))

# # Load data into Surprise dataset format
# data = Dataset.load_from_df(merged_data[['profile', 'uid', 'score']], reader)

# # Split data into train and test sets
# trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build item-based collaborative filtering model
sim_options = {'name': 'cosine', 'user_based': False}  # Set user_based to False for item-based
model_2 = KNNBasic(sim_options=sim_options)
model_2.fit(trainset)


Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x15e46bbd0>

### Evaluating the model

In [52]:

# Make predictions on the test set
predictions = model_2.test(testset)

# Calculate RMSE and MAE
rmse = rmse(predictions)
mae = mae(predictions)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')


RMSE: 1.9756
MAE:  1.4661
RMSE: 1.9756028737805376
MAE: 1.466091411640768


### Testing by inputing your liked animes

In [62]:
# hunter x hunter, tokyo ghoul, Ansatsu_Kyoushitsu, Kono_Subarashii_Sekai_ni_Shukufuku_wo, Kaguya-sama, Nichijou
liked_anime_ids = [11061, 22319, 40748, 24833, 30831, 37999, 10165]

def recommend_anime(anime_uid, model, k=10):
    # Get the k nearest neighbors (items) for the given anime UID
    anime_neighbors = model.get_neighbors(anime_uid, k=k)
    return anime_neighbors

recommended = []
for anime_id in liked_anime_ids:
    recommended.extend(recommend_anime(anime_id, model_2))

# Count the occurrences of each recommended anime ID
anime_counts = {}
for anime_id in recommended:
    anime_counts[anime_id] = anime_counts.get(anime_id, 0) + 1

# Get the top 10 most frequent anime IDs
top_10_anime = sorted(anime_counts.items(), key=lambda x: x[1], reverse=True)[:10]

print("Top 10 Most Recommended Anime:")
print(top_10_anime)



IndexError: index 11061 is out of bounds for axis 0 with size 577

In [32]:
###WASTE
# hunter x hunter, tokyo ghoul, Ansatsu_Kyoushitsu, Kono_Subarashii_Sekai_ni_Shukufuku_wo
#Kaguya-sama, Nichijou

anime_ids = anime_data.uid.unique()
liked_anime_ids = [11061, 22319, 40748, 24833, 30831, 37999, 10165]

# Predict ratings for animes i have not watched
predicted_ratings = {}
for anime_id in anime_ids:
    if anime_id not in liked_anime_ids:
        predicted_rating = model.predict(uid=None, iid=anime_id).est
        predicted_ratings[anime_id] = predicted_rating

# Sort predicted ratings and recommend top N items
top_n = 10
recommended_anime_ids = sorted(predicted_ratings, key=predicted_ratings.get, reverse=True)[:top_n]

# Print recommended anime titles
recommended_anime_titles = anime_data[anime_data['uid'].isin(recommended_anime_ids)]['title'].values
print("Recommended Anime Titles:")
for title in recommended_anime_titles:
    print(title)



Recommended Anime Titles:
Cowboy Bebop
Cowboy Bebop: Tengoku no Tobira
Trigun
Witch Hunter Robin
Bouken Ou Beet
Eyeshield 21
Hachimitsu to Clover
Hungry Heart: Wild Striker
Initial D Fourth Stage
Monster


### SVD Approach

In [33]:
from surprise import SVD
from surprise.model_selection import cross_validate

# Define rating scale
reader = Reader(rating_scale=(1, 10))

# Load data into Surprise dataset format
data = Dataset.load_from_df(merged_data[['profile', 'uid', 'score']], reader)

# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build SVD collaborative filtering model
model = SVD()

# Train the model
model.fit(trainset)

# Make predictions
predictions = model.test(testset)

# Evaluate model
accuracy = rmse(predictions)
print("RMSE:", accuracy)


RMSE: 1.8287
RMSE: 1.828723672662592


In [34]:
import seaborn as sns

# Convert predictions to DataFrame
df_predictions = pd.DataFrame(predictions, columns=['uid', 'iid', 'actual', 'predicted', 'details'])

# Pivot predictions DataFrame to create a user-item matrix
predicted_matrix = df_predictions.pivot(index='uid', columns='iid', values='predicted')

# Create heatmap of predicted ratings
plt.figure(figsize=(10, 8))
sns.heatmap(predicted_matrix, cmap='viridis', cbar=True, linewidths=0.5)
plt.xlabel('Item ID')
plt.ylabel('User ID')
plt.title('Predicted Ratings Heatmap')
plt.show()



NameError: name 'plt' is not defined