Importing required libraries

In [190]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import pickle

Reading the dataset

In [191]:
anime = pd.read_csv("/content/anime-dataset.csv")
ratings = pd.read_csv("/content/users-score.csv")

Analysing the dataset

In [192]:
anime.head()

Unnamed: 0,anime_id,Name,English name,Other name,Score,Genres,Synopsis,Type,Episodes,Aired,...,Studios,Source,Duration,Rating,Rank,Popularity,Favorites,Scored By,Members,Image URL
0,1,Cowboy Bebop,Cowboy Bebop,カウボーイビバップ,8.75,"Action, Award Winning, Sci-Fi","Crime is timeless. By the year 2071, humanity ...",TV,26.0,"Apr 3, 1998 to Apr 24, 1999",...,Sunrise,Original,24 min per ep,R - 17+ (violence & profanity),41.0,43,78525,914193.0,1771505,https://cdn.myanimelist.net/images/anime/4/196...
1,5,Cowboy Bebop: Tengoku no Tobira,Cowboy Bebop: The Movie,カウボーイビバップ 天国の扉,8.38,"Action, Sci-Fi","Another day, another bounty—such is the life o...",Movie,1.0,"Sep 1, 2001",...,Bones,Original,1 hr 55 min,R - 17+ (violence & profanity),189.0,602,1448,206248.0,360978,https://cdn.myanimelist.net/images/anime/1439/...
2,6,Trigun,Trigun,トライガン,8.22,"Action, Adventure, Sci-Fi","Vash the Stampede is the man with a $$60,000,0...",TV,26.0,"Apr 1, 1998 to Sep 30, 1998",...,Madhouse,Manga,24 min per ep,PG-13 - Teens 13 or older,328.0,246,15035,356739.0,727252,https://cdn.myanimelist.net/images/anime/7/203...
3,7,Witch Hunter Robin,Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),7.25,"Action, Drama, Mystery, Supernatural",Robin Sena is a powerful craft user drafted in...,TV,26.0,"Jul 3, 2002 to Dec 25, 2002",...,Sunrise,Original,25 min per ep,PG-13 - Teens 13 or older,2764.0,1795,613,42829.0,111931,https://cdn.myanimelist.net/images/anime/10/19...
4,8,Bouken Ou Beet,Beet the Vandel Buster,冒険王ビィト,6.94,"Adventure, Fantasy, Supernatural",It is the dark century and the people are suff...,TV,52.0,"Sep 30, 2004 to Sep 29, 2005",...,Toei Animation,Manga,23 min per ep,PG - Children,4240.0,5126,14,6413.0,15001,https://cdn.myanimelist.net/images/anime/7/215...


In [193]:
ratings.head()

Unnamed: 0,user_id,Username,anime_id,Anime Title,rating
0,1,Xinil,21,One Piece,9.0
1,1,Xinil,48,.hack//Sign,7.0
2,1,Xinil,320,A Kite,5.0
3,1,Xinil,49,Aa! Megami-sama!,8.0
4,1,Xinil,304,Aa! Megami-sama! Movie,8.0


In [194]:
print(anime.shape)
print(ratings.shape)

(24905, 24)
(423175, 5)


# Data Cleaning
Removing redundant columns

In [195]:
anime.drop(['English name','Other name','Score','Synopsis','Type','Episodes','Aired','Premiered','Status','Producers','Licensors','Studios','Source','Duration','Rating','Rank','Popularity','Favorites','Scored By','Members'], axis=1, inplace=True)
anime.drop_duplicates('Name')

Unnamed: 0,anime_id,Name,Genres,Image URL
0,1,Cowboy Bebop,"Action, Award Winning, Sci-Fi",https://cdn.myanimelist.net/images/anime/4/196...
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Sci-Fi",https://cdn.myanimelist.net/images/anime/1439/...
2,6,Trigun,"Action, Adventure, Sci-Fi",https://cdn.myanimelist.net/images/anime/7/203...
3,7,Witch Hunter Robin,"Action, Drama, Mystery, Supernatural",https://cdn.myanimelist.net/images/anime/10/19...
4,8,Bouken Ou Beet,"Adventure, Fantasy, Supernatural",https://cdn.myanimelist.net/images/anime/7/215...
...,...,...,...,...
24900,55731,Wu Nao Monu,"Comedy, Fantasy, Slice of Life",https://cdn.myanimelist.net/images/anime/1386/...
24901,55732,Bu Xing Si: Yuan Qi,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/1383/...
24902,55733,Di Yi Xulie,"Action, Adventure, Fantasy, Sci-Fi",https://cdn.myanimelist.net/images/anime/1130/...
24903,55734,Bokura no Saishuu Sensou,UNKNOWN,https://cdn.myanimelist.net/images/anime/1931/...


In [196]:
ratings.drop(['Username','Anime Title'], axis=1, inplace=True)
ratings

Unnamed: 0,user_id,anime_id,rating
0,1,21,9.0
1,1,48,7.0
2,1,320,5.0
3,1,49,8.0
4,1,304,8.0
...,...,...,...
423170,5063,1691,8.0
423171,5063,572,8.0
423172,5063,16662,9.0
423173,5063,24439,8.0


Removing data with no entries

In [197]:
anime.isnull().sum()

anime_id     0
Name         0
Genres       0
Image URL    0
dtype: int64

In [198]:
ratings.isnull().sum()

user_id     0
anime_id    0
rating      1
dtype: int64

In [199]:
ratings = ratings.dropna()

Combining the dataframe

In [200]:
ratings_with_name = ratings.merge(anime,on="anime_id")
ratings_with_name

Unnamed: 0,user_id,anime_id,rating,Name,Genres,Image URL
0,1,21,9.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
1,20,21,9.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
2,23,21,7.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
3,67,21,8.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
4,70,21,8.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
...,...,...,...,...,...,...
423169,5037,34538,4.0,"Soushin Shoujo Matoi: Yuma-chin, Matomemashita...",Supernatural,https://cdn.myanimelist.net/images/anime/10/83...
423170,5037,35231,6.0,Vatican Kiseki Chousakan: Majo no Soup,"Drama, Mystery, Supernatural",https://cdn.myanimelist.net/images/anime/1272/...
423171,5037,6868,4.0,Wansa-kun,Comedy,https://cdn.myanimelist.net/images/anime/9/567...
423172,5037,34961,6.0,Yume Oukoku to Nemureru 100-nin no Ouji-sama: ...,UNKNOWN,https://cdn.myanimelist.net/images/anime/12/84...


# Popularity Based Recommender System
Displaying top 100 anime with highest average ratings. Only anime with more than 500 votes are chosen.

In [201]:
num_rating_df = ratings_with_name.groupby('Name').count()['rating'].reset_index()
num_rating_df.rename(columns = {'rating' :'num_ratings'},inplace = True)
num_rating_df

Unnamed: 0,Name,num_ratings
0,"""0""",3
1,"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",40
2,"""Bungaku Shoujo"" Memoire",32
3,"""Bungaku Shoujo"" Movie",59
4,"""Calpis"" Hakkou Monogatari",1
...,...,...
10013,xxxHOLiC Rou,37
10014,xxxHOLiC Shunmuki,73
10015,xxxHOLiC◆Kei,163
10016,ēlDLIVE,10


In [202]:
avg_rating_df = ratings_with_name.groupby('Name').mean()['rating'].reset_index()
avg_rating_df.rename(columns = {'rating' :'avg_ratings'},inplace = True)
avg_rating_df

  avg_rating_df = ratings_with_name.groupby('Name').mean()['rating'].reset_index()


Unnamed: 0,Name,avg_ratings
0,"""0""",4.333333
1,"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",7.000000
2,"""Bungaku Shoujo"" Memoire",7.062500
3,"""Bungaku Shoujo"" Movie",7.271186
4,"""Calpis"" Hakkou Monogatari",5.000000
...,...,...
10013,xxxHOLiC Rou,7.945946
10014,xxxHOLiC Shunmuki,8.013699
10015,xxxHOLiC◆Kei,8.208589
10016,ēlDLIVE,6.800000


In [203]:
popular_df = num_rating_df.merge(avg_rating_df,on='Name')
popular_df['avg_ratings'] = round(popular_df['avg_ratings'],2)
popular_df

Unnamed: 0,Name,num_ratings,avg_ratings
0,"""0""",3,4.33
1,"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",40,7.00
2,"""Bungaku Shoujo"" Memoire",32,7.06
3,"""Bungaku Shoujo"" Movie",59,7.27
4,"""Calpis"" Hakkou Monogatari",1,5.00
...,...,...,...
10013,xxxHOLiC Rou,37,7.95
10014,xxxHOLiC Shunmuki,73,8.01
10015,xxxHOLiC◆Kei,163,8.21
10016,ēlDLIVE,10,6.80


In [204]:
popular_df = popular_df[popular_df['num_ratings']>=500].sort_values('avg_ratings',ascending = False).head(100)
popular_df

Unnamed: 0,Name,num_ratings,avg_ratings
2622,Fullmetal Alchemist: Brotherhood,711,9.02
7442,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,636,8.99
8975,Toki wo Kakeru Shoujo,598,8.79
8753,Tengen Toppa Gurren Lagann,782,8.77
763,Baccano!,629,8.72
...,...,...,...
1145,Bokusatsu Tenshi Dokuro-chan,558,7.23
1065,Blood: The Last Vampire,558,7.14
3969,InuYasha,739,7.11
8764,Tenjou Tenge,536,7.11


In [205]:
popular_df = popular_df.merge(anime,on='Name').drop_duplicates('Name')[['Name','Genres','Image URL','num_ratings','avg_ratings']]
popular_df

Unnamed: 0,Name,Genres,Image URL,num_ratings,avg_ratings
0,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy",https://cdn.myanimelist.net/images/anime/1208/...,711,9.02
1,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,"Action, Drama, Romance",https://cdn.myanimelist.net/images/anime/1391/...,636,8.99
2,Toki wo Kakeru Shoujo,"Award Winning, Drama, Romance, Sci-Fi",https://cdn.myanimelist.net/images/anime/1/243...,598,8.79
3,Tengen Toppa Gurren Lagann,"Action, Adventure, Award Winning, Sci-Fi",https://cdn.myanimelist.net/images/anime/4/512...,782,8.77
4,Baccano!,"Action, Comedy, Mystery, Supernatural",https://cdn.myanimelist.net/images/anime/3/145...,629,8.72
...,...,...,...,...,...
95,Bokusatsu Tenshi Dokuro-chan,"Comedy, Ecchi",https://cdn.myanimelist.net/images/anime/10/13...,558,7.23
96,Blood: The Last Vampire,"Award Winning, Horror, Supernatural",https://cdn.myanimelist.net/images/anime/9/189...,558,7.14
97,InuYasha,"Action, Adventure, Fantasy, Romance",https://cdn.myanimelist.net/images/anime/1589/...,739,7.11
98,Tenjou Tenge,"Action, Comedy, Ecchi",https://cdn.myanimelist.net/images/anime/1507/...,536,7.11


# Collaborative Filtering Based Recommender System
Considering only those users who have given ratings on more than 150 anime and only those anime who have recieved more than 200 ratings.

In [206]:
x = ratings_with_name.groupby('user_id').count()['rating'] > 150
valid_users = x[x].index
print(valid_users)

Int64Index([   1,    4,   23,   48,   66,   70,   82,   88,  119,  133,
            ...
            5034, 5036, 5037, 5038, 5040, 5042, 5043, 5044, 5059, 5063],
           dtype='int64', name='user_id', length=994)


In [207]:
filtered_rating = ratings_with_name[ratings_with_name['user_id'].isin(valid_users)]
filtered_rating

Unnamed: 0,user_id,anime_id,rating,Name,Genres,Image URL
0,1,21,9.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
2,23,21,7.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
4,70,21,8.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
7,88,21,10.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
14,119,21,8.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
...,...,...,...,...,...,...
423169,5037,34538,4.0,"Soushin Shoujo Matoi: Yuma-chin, Matomemashita...",Supernatural,https://cdn.myanimelist.net/images/anime/10/83...
423170,5037,35231,6.0,Vatican Kiseki Chousakan: Majo no Soup,"Drama, Mystery, Supernatural",https://cdn.myanimelist.net/images/anime/1272/...
423171,5037,6868,4.0,Wansa-kun,Comedy,https://cdn.myanimelist.net/images/anime/9/567...
423172,5037,34961,6.0,Yume Oukoku to Nemureru 100-nin no Ouji-sama: ...,UNKNOWN,https://cdn.myanimelist.net/images/anime/12/84...


In [208]:
y = filtered_rating.groupby('Name').count()['rating']>=200
valid_anime = y[y].index

In [209]:
final_ratings = filtered_rating[filtered_rating['Name'].isin(valid_anime)]
final_ratings

Unnamed: 0,user_id,anime_id,rating,Name,Genres,Image URL
0,1,21,9.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
2,23,21,7.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
4,70,21,8.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
7,88,21,10.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
14,119,21,8.0,One Piece,"Action, Adventure, Fantasy",https://cdn.myanimelist.net/images/anime/6/732...
...,...,...,...,...,...,...
319714,4927,894,7.0,Dragon Ball Z Movie 01: Ora no Gohan wo Kaese!!,"Action, Adventure, Comedy, Fantasy, Sci-Fi",https://cdn.myanimelist.net/images/anime/1258/...
319718,4964,894,6.0,Dragon Ball Z Movie 01: Ora no Gohan wo Kaese!!,"Action, Adventure, Comedy, Fantasy, Sci-Fi",https://cdn.myanimelist.net/images/anime/1258/...
319719,4998,894,7.0,Dragon Ball Z Movie 01: Ora no Gohan wo Kaese!!,"Action, Adventure, Comedy, Fantasy, Sci-Fi",https://cdn.myanimelist.net/images/anime/1258/...
319720,5017,894,5.0,Dragon Ball Z Movie 01: Ora no Gohan wo Kaese!!,"Action, Adventure, Comedy, Fantasy, Sci-Fi",https://cdn.myanimelist.net/images/anime/1258/...


In [210]:
pt = final_ratings.pivot_table(index='Name',columns='user_id',values='rating')
pt.fillna(0,inplace=True)
pt

user_id,1,4,23,48,66,70,82,88,119,133,...,5034,5036,5037,5038,5040,5042,5043,5044,5059,5063
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
.hack//Roots,0.0,6.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0
.hack//Sign,7.0,0.0,0.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,9.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0
.hack//Tasogare no Udewa Densetsu,0.0,5.0,0.0,7.0,8.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,7.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0
Aa! Megami-sama!,8.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aa! Megami-sama! (TV),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zero no Tsukaima: Futatsuki no Kishi,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,7.0,0.0,9.0,6.0,0.0
Zombie-Loan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,6.0,...,0.0,0.0,8.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
ef: A Tale of Memories.,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,...,8.0,0.0,0.0,9.0,9.0,8.0,0.0,10.0,8.0,9.0
s.CRY.ed,7.0,0.0,0.0,7.0,0.0,9.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Finding similar anime using cosine similarity

In [211]:
similarity_scores = cosine_similarity(pt)
similarity_scores.shape

(337, 337)

In [212]:
print(sorted(list(enumerate(similarity_scores[1])),key=lambda x:x[1],reverse=True)[1:5])
pt.index[23]

[(2, 0.7096651555268104), (0, 0.6339954363813513), (121, 0.6117290326532828), (113, 0.5851133607635807)]


'Arakawa Under the Bridge'

Defining functions for finding similar anime

In [213]:
def indx_anime(anime):
    i=0
    while (i<len(pt.index)):
        if anime.lower() in pt.index[i].lower():
            return i
        else:
            i+=1

In [214]:
def recommend(anime_name):
    # index fetch

    index = indx_anime(anime_name)
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]


    data = []

    matched = []
    df_matched = anime[anime['Name'] == pt.index[index]]
    matched.extend(list(df_matched.drop_duplicates('Name')['Name'].values))
    matched.extend(list(df_matched.drop_duplicates('Name')['Genres'].values))
    matched.extend(list(df_matched.drop_duplicates('Name')['Image URL'].values))

    data.append(matched)

    for i in similar_items:
        item = []
        temp_df = anime[anime['Name'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Name')['Name'].values))
        item.extend(list(temp_df.drop_duplicates('Name')['Genres'].values))
        item.extend(list(temp_df.drop_duplicates('Name')['Image URL'].values))

        data.append(item)

    if (len(data)!=0):
        return data
    else:
        return "anime not available"

Testing the recommender system

In [215]:
recommend('One Piece')

[['One Piece',
  'Action, Adventure, Fantasy',
  'https://cdn.myanimelist.net/images/anime/6/73245.jpg'],
 ['Naruto',
  'Action, Adventure, Fantasy',
  'https://cdn.myanimelist.net/images/anime/13/17405.jpg'],
 ['Bleach',
  'Action, Adventure, Fantasy',
  'https://cdn.myanimelist.net/images/anime/3/40451.jpg'],
 ['Fullmetal Alchemist',
  'Action, Adventure, Award Winning, Drama, Fantasy',
  'https://cdn.myanimelist.net/images/anime/10/75815.jpg'],
 ['Naruto: Shippuuden',
  'Action, Adventure, Fantasy',
  'https://cdn.myanimelist.net/images/anime/1565/111305.jpg']]

Exporting pickle file

In [216]:
pickle.dump(popular_df,open('popular.pkl','wb'))
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(anime,open('anime.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))