## Import required libraries

In [2]:
import pandas as pd
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

## Import cleaned dataset

In [19]:

anime_reviews = pd.read_csv("datasets/anime_review_cleaned.csv")
anime_data = pd.read_csv("datasets/anime_2020_clean.csv")



## Creating user-item matrix

In [20]:

## REMOVE LATER MAYBE-------
reviews_data = reviews_data.drop(['text', 'uid', 'link'], axis=1)
anime_data = anime_data.drop('score', axis=1)
##THIS LINE -------

# Merge data
merged_data = pd.merge(anime_data, reviews_data, left_on='uid', right_on='anime_uid')
merged_data.head()


Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,link,profile,anime_uid,score,scores
0,6,Trigun,"Vash the Stampede is the man with a $$60,000,0...","['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Apr 1, 1998 to Sep 30, 1998",26.0,460146,158,https://myanimelist.net/anime/6/Trigun,AxiomOpus,6,9,"{'Overall': '9', 'Story': '9', 'Animation': '9..."
1,6,Trigun,"Vash the Stampede is the man with a $$60,000,0...","['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Apr 1, 1998 to Sep 30, 1998",26.0,460146,158,https://myanimelist.net/anime/6/Trigun,DanteMustDie8907,6,8,"{'Overall': '8', 'Story': '8', 'Animation': '5..."
2,6,Trigun,"Vash the Stampede is the man with a $$60,000,0...","['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Apr 1, 1998 to Sep 30, 1998",26.0,460146,158,https://myanimelist.net/anime/6/Trigun,OkazukiIchigo,6,10,"{'Overall': '10', 'Story': '10', 'Animation': ..."
3,6,Trigun,"Vash the Stampede is the man with a $$60,000,0...","['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Apr 1, 1998 to Sep 30, 1998",26.0,460146,158,https://myanimelist.net/anime/6/Trigun,cobrascope,6,9,"{'Overall': '9', 'Story': '0', 'Animation': '0..."
4,6,Trigun,"Vash the Stampede is the man with a $$60,000,0...","['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Apr 1, 1998 to Sep 30, 1998",26.0,460146,158,https://myanimelist.net/anime/6/Trigun,MrStealYourNanay,6,9,"{'Overall': '9', 'Story': '10', 'Animation': '..."


## Model Training: User-based Collaborative training

In [5]:

# Define rating scale
reader = Reader(rating_scale=(1, 10))

# Load data into Surprise dataset format
data = Dataset.load_from_df(merged_data[['profile', 'uid', 'score']], reader)

# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build user-based collaborative filtering model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)
model.fit(trainset)

# Make predictions
predictions = model.test(testset)

# Evaluate model
accuracy = rmse(predictions)
print("RMSE:", accuracy)



# Generate recommendations for a specific user
user_id = 'skrn'
anime_ids = [uid for uid in anime_data['uid'].values if uid not in merged_data[merged_data['profile'] == user_id]['uid'].values]

# Predict ratings for items not rated by the user
predicted_ratings = {}
for anime_id in anime_ids:
    predicted_rating = model.predict(user_id, anime_id).est
    predicted_ratings[anime_id] = predicted_rating

# Sort predicted ratings and recommend top N items
top_n = 10
recommended_anime_ids = sorted(predicted_ratings, key=predicted_ratings.get, reverse=True)[:top_n]

# Print recommended anime titles
recommended_anime_titles = anime_data[anime_data['uid'].isin(recommended_anime_ids)]['title'].values
print("Recommended Anime Titles:")
for title in recommended_anime_titles:
    print(title)


Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.8969
RMSE: 1.896932641007293
Recommended Anime Titles:
Gintama Movie 2: Kanketsu-hen - Yorozuya yo Eien Nare
Clannad: After Story
Saraiya Goyou
Kuroko no Basket 2nd Season NG-shuu
Bungou Stray Dogs: Dead Apple
Les Misérables: Shoujo Cosette
Kuroko no Basket: Tip Off
Flanders no Inu (Movie)
Terra e... (TV)
High☆Speed!: Free! Starting Days
Gintama Movie 2: Kanketsu-hen - Yorozuya yo Eien Nare
Clannad: After Story
Saraiya Goyou
Kuroko no Basket 2nd Season NG-shuu
Bungou Stray Dogs: Dead Apple
Les Misérables: Shoujo Cosette
Kuroko no Basket: Tip Off
Flanders no Inu (Movie)
Terra e... (TV)
High☆Speed!: Free! Starting Days


## Checking accuracy

In [6]:
# from surprise import accuracy

# # Calculate RMSE and MAE
# rmse = accuracy.rmse(predictions)
# mae = accuracy.mae(predictions)

# print("RMSE:", rmse)
# print("MAE:", mae)


## Model testing

In [7]:
### Test for a user in the dataset

In [8]:
#test
anime_data[anime_data['uid']==10165]

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,img_url,link
696,10165,Nichijou,Nichijou primarily focuses on the daily antic...,"['Slice of Life', 'Comedy', 'School', 'Shounen']","Apr 3, 2011 to Sep 25, 2011",26.0,497276,137,97.0,https://cdn.myanimelist.net/images/anime/3/756...,https://myanimelist.net/anime/10165/Nichijou
16741,10165,Nichijou,Nichijou primarily focuses on the daily antic...,"['Slice of Life', 'Comedy', 'School', 'Shounen']","Apr 3, 2011 to Sep 25, 2011",26.0,497276,137,97.0,https://cdn.myanimelist.net/images/anime/3/756...,https://myanimelist.net/anime/10165/Nichijou


### Testing by inputing your liked animes

In [9]:

# hunter x hunter, tokyo ghoul, Ansatsu_Kyoushitsu, Kono_Subarashii_Sekai_ni_Shukufuku_wo
#Kaguya-sama, Nichijou

anime_ids = anime_data.uid.unique()
liked_anime_ids = [11061, 22319, 40748, 24833, 30831, 37999, 10165]

# Predict ratings for animes i have not watched
predicted_ratings = {}
for anime_id in anime_ids:
    if anime_id not in liked_anime_ids:
        predicted_rating = model.predict(uid=None, iid=anime_id).est
        predicted_ratings[anime_id] = predicted_rating

# Sort predicted ratings and recommend top N items
top_n = 10
recommended_anime_ids = sorted(predicted_ratings, key=predicted_ratings.get, reverse=True)[:top_n]

# Print recommended anime titles
recommended_anime_titles = anime_data[anime_data['uid'].isin(recommended_anime_ids)]['title'].values
print("Recommended Anime Titles:")
for title in recommended_anime_titles:
    print(title)



Recommended Anime Titles:
Haikyuu!! Second Season
Shigatsu wa Kimi no Uso
Made in Abyss
Fullmetal Alchemist: Brotherhood
Kizumonogatari III: Reiketsu-hen
Mob Psycho 100 II
Sen to Chihiro no Kamikakushi
Kimetsu no Yaiba
Owarimonogatari 2nd Season
Code Geass: Hangyaku no Lelouch R2
Haikyuu!! Second Season
Shigatsu wa Kimi no Uso
Made in Abyss
Fullmetal Alchemist: Brotherhood
Kizumonogatari III: Reiketsu-hen
Mob Psycho 100 II
Sen to Chihiro no Kamikakushi
Kimetsu no Yaiba
Owarimonogatari 2nd Season
Code Geass: Hangyaku no Lelouch R2


### SVD Approach

In [10]:
from surprise import SVD
from surprise.model_selection import cross_validate

# Define rating scale
reader = Reader(rating_scale=(1, 10))

# Load data into Surprise dataset format
data = Dataset.load_from_df(merged_data[['profile', 'uid', 'score']], reader)

# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Build SVD collaborative filtering model
model = SVD()

# Train the model
model.fit(trainset)

# Make predictions
predictions = model.test(testset)

# Evaluate model
accuracy = rmse(predictions)
print("RMSE:", accuracy)


RMSE: 1.0389
RMSE: 1.0389390573968855


In [11]:
import seaborn as sns

# Convert predictions to DataFrame
df_predictions = pd.DataFrame(predictions, columns=['uid', 'iid', 'actual', 'predicted', 'details'])

# Pivot predictions DataFrame to create a user-item matrix
predicted_matrix = df_predictions.pivot(index='uid', columns='iid', values='predicted')

# Create heatmap of predicted ratings
plt.figure(figsize=(10, 8))
sns.heatmap(predicted_matrix, cmap='viridis', cbar=True, linewidths=0.5)
plt.xlabel('Item ID')
plt.ylabel('User ID')
plt.title('Predicted Ratings Heatmap')
plt.show()



ValueError: Index contains duplicate entries, cannot reshape