In [8]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
#loading rating dataset
ratings = pd.read_csv("rating.csv")
print(ratings.head())

   user_id  anime_id  rating
0        1        20      -1
1        1        24      -1
2        1        79      -1
3        1       226      -1
4        1       241      -1


In [9]:
# loading movie dataset
animes = pd.read_csv("anime.csv")
print(animes.head())

   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  


In [10]:
# Statistical Analysis of Ratings
n_ratings = len(ratings)
n_animes = len(ratings['anime_id'].unique())
n_users = len(ratings['user_id'].unique())
 
print(f"Number of ratings: {n_ratings}")
print(f"Number of unique anime_id's: {n_animes}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per anime: {round(n_ratings/n_animes, 2)}")

Number of ratings: 7813737
Number of unique anime_id's: 11200
Number of unique users: 73515
Average ratings per user: 106.29
Average ratings per anime: 697.66


In [12]:
# User Rating Frequency
user_freq = ratings[['user_id', 'anime_id']].groupby(
    'user_id').count().reset_index()
user_freq.columns = ['user_id', 'n_ratings']
print(user_freq.head())

   user_id  n_ratings
0        1        153
1        2          3
2        3         94
3        4         52
4        5        467


In [13]:
# Movie Rating Analysis
# Find Lowest and Highest rated movies:
mean_rating = ratings.groupby('anime_id')[['rating']].mean()
# Lowest rated animes
lowest_rated = mean_rating['rating'].idxmin()
movies.loc[movies['anime_id'] == lowest_rated]
# Highest rated animes
highest_rated = mean_rating['rating'].idxmax()
animes.loc[animes['anime_id'] == highest_rated]
# show number of people who rated animes rated anime highest
ratings[ratings['anime_id']==highest_rated]
# show number of people who rated animes rated anime lowest
ratings[ratings['anime_id']==lowest_rated]
 
## the above animes has very low dataset. We will use bayesian average
anime_stats = ratings.groupby('anime_id')[['rating']].agg(['count', 'mean'])
anime_stats.columns = anime_stats.columns.droplevel()

In [14]:
# User-Item Matrix Creation
# Now, we create user-item matrix using scipy csr matrix
from scipy.sparse import csr_matrix
 
def create_matrix(df):
     
    N = len(df['user_id'].unique())
    M = len(df['anime_id'].unique())
     
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["user_id"]), list(range(N))))
    anime_mapper = dict(zip(np.unique(df["anime_id"]), list(range(M))))
     
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id"])))
    anime_inv_mapper = dict(zip(list(range(M)), np.unique(df["anime_id"])))
     
    user_index = [user_mapper[i] for i in df['user_id']]
    anime_index = [anime_mapper[i] for i in df['anime_id']]
 
    X = csr_matrix((df["rating"], (anime_index, user_index)), shape=(M, N))
     
    return X, user_mapper, anime_mapper, user_inv_mapper, anime_inv_mapper
     
X, user_mapper, anime_mapper, user_inv_mapper, anime_inv_mapper = create_matrix(ratings)

In [17]:
# Movie Similarity Analysis
"""
Find similar animes using KNN
"""
def find_similar_animes(anime_id, X, k, metric='cosine', show_distance=False):
     
    neighbour_ids = []
     
    anime_ind = anime_mapper[anime_id]
    anime_vec = X[anime_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    anime_vec = anime_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(anime_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(anime_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids
 
 
anime_titles = dict(zip(animes['anime_id'], animes['name']))
 
anime_id = 3
 
similar_ids = find_similar_animes(anime_id, X, k=10)
anime_title = anime_titles[anime_id]
 
print(f"Since you watched {anime_title}")
for i in similar_ids:
    print(anime_titles[i])

KeyError: 3

In [23]:
# Movie Recommendation with respect to Users Preference
def recommend_animes_for_user(user_id, X, user_mapper, anime_mapper, anime_inv_mapper, k=10):
    df1 = ratings[ratings['user_id'] == user_id]
     
    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return
 
    anime_id = df1[df1['rating'] == max(df1['rating'])]['anime_id'].iloc[0]
 
    anime_titles = dict(zip(animes['anime_id'], animes['name']))
 
    similar_ids = find_similar_movies(anime_id, X, k)
    anime_title = anime_titles.get(anime_id, "Movie not found")
 
    if anime_title == "Movie not found":
        print(f"Movie with ID {anime_id} not found.")
        return
 
    print(f"Since you watched {anime_title}, you might also like:")
    for i in similar_ids:
        print(anime_titles.get(i, "Anime not found"))

In [24]:
# Reccomment the movies
user_id = 20  # Replace with the desired user ID
recommend_animes_for_user(user_id, X, user_mapper, anime_mapper, anime_inv_mapper, k=10)

Since you watched InuYasha, you might also like:
InuYasha: Kanketsu-hen
InuYasha: Kagami no Naka no Mugenjo
InuYasha: Guren no Houraijima
InuYasha: Toki wo Koeru Omoi
InuYasha: Tenka Hadou no Ken
Fullmetal Alchemist
Yuu☆Yuu☆Hakusho
Rurouni Kenshin: Meiji Kenkaku Romantan
Dragon Ball Z
Naruto


In [25]:
user_id = 79  # Replace with the desired user ID
recommend_animes_for_user(user_id, X, user_mapper, anime_mapper, anime_inv_mapper, k=10)

Since you watched Mononoke Hime, you might also like:
Sen to Chihiro no Kamikakushi
Howl no Ugoku Shiro
Tonari no Totoro
Kaze no Tani no Nausicaä
Tenkuu no Shiro Laputa
Majo no Takkyuubin
Hotaru no Haka
Toki wo Kakeru Shoujo
Gake no Ue no Ponyo
Akira


In [26]:
user_id = 1  # Replace with the desired user ID
recommend_animes_for_user(user_id, X, user_mapper, anime_mapper, anime_inv_mapper, k=10)

Since you watched Highschool of the Dead, you might also like:
Sword Art Online
Angel Beats!
High School DxD
Highschool of the Dead: Drifters of the Dead
Mirai Nikki (TV)
Deadman Wonderland
Elfen Lied
Shingeki no Kyojin
Death Note
Kore wa Zombie Desu ka?


In [27]:
user_id = 2  # Replace with the desired user ID
recommend_animes_for_user(user_id, X, user_mapper, anime_mapper, anime_inv_mapper, k=10)

Since you watched Kuroko no Basket, you might also like:
Kuroko no Basket 2nd Season
Kuroko no Basket 3rd Season
Haikyuu!!
Shingeki no Kyojin
Ao no Exorcist
Sword Art Online
Free!
Noragami
Tokyo Ghoul
Magi: The Labyrinth of Magic


In [30]:
user_id = 6376242367777856372  # Replace with the desired user ID
recommend_animes_for_user(user_id, X, user_mapper, anime_mapper, anime_inv_mapper, k=10)

User with ID 6376242367777856372 does not exist.
