# Content Based Recommender System

Configure the project. Indeed you create a dataset in csv format.

In [1]:
! rm -rf *.csv
! unzip ./dataset/archive.zip

Archive:  ./dataset/archive.zip
  inflating: anime.csv               
  inflating: rating_complete.csv     




Import needed libraries

In [2]:
import pandas as pd

%matplotlib inline

Read data from csv files using pandas and store in data frame structure. Also shuffle data to have uniform distribution. 

In [3]:
anime_df = pd.read_csv("anime.csv")
anime_df = anime_df.sample(frac=1.0, random_state=42).reset_index(drop=True)
anime_df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,40176,Miru Tights: Cosplay Satsuei Tights,6.53,"Ecchi, School",Unknown,みるタイツ コスプレ撮影 タイツ,Special,1,"Aug 23, 2019",Unknown,...,875.0,350.0,762.0,1526.0,1542.0,924.0,384.0,245.0,162.0,148.0
1,13969,Thermae Romae x Yoyogi Animation Gakuin Collab...,6.29,"Comedy, Historical, Seinen",Unknown,テルマエ・ロマエｘ代々木アニメーション学院企業コラボレーション,Special,1,"Jul 9, 2012",Unknown,...,35.0,47.0,114.0,253.0,240.0,162.0,63.0,29.0,10.0,10.0
2,13459,Ribbon-chan,Unknown,Comedy,Unknown,リボンちゃん,TV,24,"Apr 4, 2012 to Mar 27, 2013",Spring 2012,...,7.0,Unknown,Unknown,2.0,2.0,4.0,1.0,Unknown,2.0,7.0
3,15617,Jinrui wa Suitai Shimashita Specials,7.23,"Comedy, Fantasy, Seinen",Humanity Has Declined Specials,人類は衰退しました,Special,6,"Sep 19, 2012 to Feb 20, 2013",Unknown,...,451.0,885.0,2432.0,3038.0,1388.0,588.0,130.0,38.0,22.0,19.0
4,19157,Youkai Watch,6.54,"Comedy, Demons, Kids, Supernatural",Yo-kai Watch,妖怪ウォッチ,TV,214,"Jan 8, 2014 to Mar 30, 2018",Winter 2014,...,517.0,532.0,1141.0,1912.0,1636.0,1196.0,500.0,228.0,138.0,125.0


In [4]:
rates_df = pd.read_csv("rating_complete.csv")
rates_df = rates_df.sample(frac=1.0, random_state=42).reset_index(drop=True)
rates_df.head()

Unnamed: 0,user_id,anime_id,rating
0,126602,18199,3
1,162615,39036,10
2,25497,34124,2
3,360,39607,5
4,1032,2608,10


In [5]:
print(rates_df['user_id'].value_counts())
print("\n" + "#" * 80 + "\n")
print(rates_df['anime_id'].value_counts())

user_id
68042     4734
10255     4509
162615    4474
189037    4260
38143     3544
          ... 
125529       1
310701       1
287547       1
182161       1
44941        1
Name: count, Length: 27341, dtype: int64

################################################################################

anime_id
32219    30
1914     30
1720     30
36672    30
3162     30
         ..
40959     1
40674     1
39685     1
40594     1
42144     1
Name: count, Length: 16872, dtype: int64


In [6]:
# summarize data
rates_df.describe() 

Unnamed: 0,user_id,anime_id,rating
count,447990.0,447990.0,447990.0
mean,48705.908194,19412.419686,6.258184
std,78817.293788,14646.009559,2.143759
min,0.0,1.0,1.0
25%,1578.0,4654.0,5.0
50%,10457.0,17731.0,6.0
75%,61131.25,34122.0,8.0
max,353328.0,48456.0,10.0


## Data Cleaning

In [7]:
anime_df.columns

Index(['MAL_ID', 'Name', 'Score', 'Genres', 'English name', 'Japanese name',
       'Type', 'Episodes', 'Aired', 'Premiered', 'Producers', 'Licensors',
       'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity',
       'Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped',
       'Plan to Watch', 'Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6',
       'Score-5', 'Score-4', 'Score-3', 'Score-2', 'Score-1'],
      dtype='object')

In [8]:
anime_df['Genres'] = anime_df.Genres.str.split(',')
anime_df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,40176,Miru Tights: Cosplay Satsuei Tights,6.53,"[Ecchi, School]",Unknown,みるタイツ コスプレ撮影 タイツ,Special,1,"Aug 23, 2019",Unknown,...,875.0,350.0,762.0,1526.0,1542.0,924.0,384.0,245.0,162.0,148.0
1,13969,Thermae Romae x Yoyogi Animation Gakuin Collab...,6.29,"[Comedy, Historical, Seinen]",Unknown,テルマエ・ロマエｘ代々木アニメーション学院企業コラボレーション,Special,1,"Jul 9, 2012",Unknown,...,35.0,47.0,114.0,253.0,240.0,162.0,63.0,29.0,10.0,10.0
2,13459,Ribbon-chan,Unknown,[Comedy],Unknown,リボンちゃん,TV,24,"Apr 4, 2012 to Mar 27, 2013",Spring 2012,...,7.0,Unknown,Unknown,2.0,2.0,4.0,1.0,Unknown,2.0,7.0
3,15617,Jinrui wa Suitai Shimashita Specials,7.23,"[Comedy, Fantasy, Seinen]",Humanity Has Declined Specials,人類は衰退しました,Special,6,"Sep 19, 2012 to Feb 20, 2013",Unknown,...,451.0,885.0,2432.0,3038.0,1388.0,588.0,130.0,38.0,22.0,19.0
4,19157,Youkai Watch,6.54,"[Comedy, Demons, Kids, Supernatural]",Yo-kai Watch,妖怪ウォッチ,TV,214,"Jan 8, 2014 to Mar 30, 2018",Winter 2014,...,517.0,532.0,1141.0,1912.0,1636.0,1196.0,500.0,228.0,138.0,125.0


In [9]:
animeWithGenres_df = anime_df.drop(columns=['Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6', 'Score-5', 'Score-4', 'Score-3', 'Score-2', 'Score-1'])

for index, row in anime_df.iterrows():
    for genre in row['Genres']:
        animeWithGenres_df.at[index, genre] = 1

#Filling in the NaN values with 0 
animeWithGenres_df = animeWithGenres_df.fillna(0)


animeWithGenres_df

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Super Power,Psychological,Yuri,Samurai,Martial Arts,Josei,Shoujo,Seinen,Yaoi,Shounen Ai
0,40176,Miru Tights: Cosplay Satsuei Tights,6.53,"[Ecchi, School]",Unknown,みるタイツ コスプレ撮影 タイツ,Special,1,"Aug 23, 2019",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,13969,Thermae Romae x Yoyogi Animation Gakuin Collab...,6.29,"[Comedy, Historical, Seinen]",Unknown,テルマエ・ロマエｘ代々木アニメーション学院企業コラボレーション,Special,1,"Jul 9, 2012",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,13459,Ribbon-chan,Unknown,[Comedy],Unknown,リボンちゃん,TV,24,"Apr 4, 2012 to Mar 27, 2013",Spring 2012,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,15617,Jinrui wa Suitai Shimashita Specials,7.23,"[Comedy, Fantasy, Seinen]",Humanity Has Declined Specials,人類は衰退しました,Special,6,"Sep 19, 2012 to Feb 20, 2013",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,19157,Youkai Watch,6.54,"[Comedy, Demons, Kids, Supernatural]",Yo-kai Watch,妖怪ウォッチ,TV,214,"Jan 8, 2014 to Mar 30, 2018",Winter 2014,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17557,32238,"Watashi wa, Kairaku Izonshou",6.2,[Hentai],Unknown,私は、快楽依存症,OVA,2,"Feb 26, 2016 to May 20, 2016",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17558,33552,Mameshiba Bangai-hen,5.75,"[Music, Comedy]",Unknown,豆しば番外編,Special,5,"2008 to Jun 20, 2019",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17559,8476,Otome Youkai Zakuro,7.47,"[Demons, Historical, Military, Romance, Se...",Zakuro,おとめ妖怪 ざくろ,TV,13,"Oct 5, 2010 to Dec 28, 2010",Fall 2010,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17560,953,Jyu Oh Sei,7.26,"[Action, Sci-Fi, Adventure, Mystery, Drama...",Jyu-Oh-Sei:Planet of the Beast King,獣王星,TV,11,"Apr 14, 2006 to Jun 23, 2006",Spring 2006,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Get the input from user

In [10]:
userInput = [
            {'Title':'ERASED', 'Rating':10},
            {'Title':'Violet Evergarden', 'Rating':9.5},
            {'Title':'Goblin Slayer', 'Rating':6},
            {'Title':"Berserk", 'Rating':8},
            {'Title':'Attack on Titan', 'Rating':7},
            {'Title':"Tokyo Ghoul", 'Rating':6.5},
            {'Title':"Orange", 'Rating':6},
            {'Title':"Death Parade", 'Rating':8},
            {'Title':"Death Note", 'Rating':7.5},
            {'Title':"Bungou Stray Dogs", 'Rating':7.5},
            {'Title':"Weathering With You", 'Rating':8},
            {'Title':"Your Name", 'Rating':8},
            {'Title':"I want to eat your pancreas", 'Rating':8.5},
            {'Title':"Princess Mononoke", 'Rating':7.5},
            {'Title':"Spirited Away", 'Rating':7.5},
            {'Title':"A Silent Voice", 'Rating':8.5},
            {'Title':"Ao Haru Ride", 'Rating':5.5},
            {'Title':"The Girl Who Leapt Through Time", 'Rating':7},
            {'Title':"Another", 'Rating':7.5},
            {'Title':"Demon Slayer", 'Rating':7},
            {'Title':"Your Lie in April", 'Rating':8},
            {'Title':"5 Centimeters per Second", 'Rating':6},
            {'Title':"The Anthem of the Heart", 'Rating':7.5},
            {'Title':"Evangelion", 'Rating':5}
         ] 
inputAnime = pd.DataFrame(userInput)
inputAnime

Unnamed: 0,Title,Rating
0,ERASED,10.0
1,Violet Evergarden,9.5
2,Goblin Slayer,6.0
3,Berserk,8.0
4,Attack on Titan,7.0
5,Tokyo Ghoul,6.5
6,Orange,6.0
7,Death Parade,8.0
8,Death Note,7.5
9,Bungou Stray Dogs,7.5


In [11]:
animeWithGenres_df.columns[:25]

Index(['MAL_ID', 'Name', 'Score', 'Genres', 'English name', 'Japanese name',
       'Type', 'Episodes', 'Aired', 'Premiered', 'Producers', 'Licensors',
       'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity',
       'Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped',
       'Plan to Watch'],
      dtype='object')

In [12]:
anime_df['Name_lower'] = anime_df['Name'].str.lower()
anime_df['English_lower'] = anime_df['English name'].str.lower()
inputAnime['Title_lower'] = inputAnime['Title'].str.lower()

def find_best_match(Title, anime_df):
    if Title in anime_df['Name_lower'].values:
        return anime_df[anime_df['Name_lower'] == Title]['Name'].values[0]
    elif Title in anime_df['English_lower'].values:
        return anime_df[anime_df['English_lower'] == Title]['Name'].values[0]
    else:
        for idx, row in anime_df.iterrows():
            if Title in row['Name_lower'] or Title in row['English_lower']:
                return row['Name']
    return None

inputAnime['best_match'] = inputAnime['Title_lower'].apply(find_best_match, anime_df=anime_df)
inputAnime = inputAnime.dropna(subset=['best_match'])
inputAnime['Title'] = inputAnime['best_match']
anime_df.drop(['Name_lower', 'English_lower'], axis=1, inplace=True)
inputAnime.drop(['Title_lower', 'best_match'], axis=1, inplace=True)

inputAnime

Unnamed: 0,Title,Rating
0,Boku dake ga Inai Machi,10.0
1,Violet Evergarden,9.5
2,Goblin Slayer,6.0
3,Berserk,8.0
4,Shingeki no Kyojin,7.0
5,Tokyo Ghoul,6.5
6,Orange,6.0
7,Death Parade,8.0
8,Death Note,7.5
9,Bungou Stray Dogs,7.5


In [13]:
inputAnime = inputAnime.merge(anime_df[['MAL_ID', 'Name']], left_on='Title', right_on='Name', how='left')
inputAnime = inputAnime[['MAL_ID', 'Title', 'Rating']]

inputAnime = inputAnime.sort_values(by='MAL_ID')
inputAnime = inputAnime.reset_index(drop=True)

inputAnime

Unnamed: 0,MAL_ID,Title,Rating
0,164,Mononoke Hime,7.5
1,199,Sen to Chihiro no Kamikakushi,7.5
2,1535,Death Note,7.5
3,1689,Byousoku 5 Centimeter,6.0
4,2236,Toki wo Kakeru Shoujo,7.0
5,11111,Another,7.5
6,16498,Shingeki no Kyojin,7.0
7,21995,Ao Haru Ride,5.5
8,22319,Tokyo Ghoul,6.5
9,23273,Shigatsu wa Kimi no Uso,8.0


In [14]:
userAnimes = animeWithGenres_df[animeWithGenres_df['MAL_ID'].isin(inputAnime['MAL_ID'].tolist())]
userAnimes = userAnimes.sort_values(by='MAL_ID')
userAnimes

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Super Power,Psychological,Yuri,Samurai,Martial Arts,Josei,Shoujo,Seinen,Yaoi,Shounen Ai
12056,164,Mononoke Hime,8.72,"[Action, Adventure, Fantasy]",Princess Mononoke,もののけ姫,Movie,1,"Jul 12, 1997",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2741,199,Sen to Chihiro no Kamikakushi,8.83,"[Adventure, Supernatural, Drama]",Spirited Away,千と千尋の神隠し,Movie,1,"Jul 20, 2001",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2052,1535,Death Note,8.63,"[Mystery, Police, Psychological, Supernatur...",Death Note,デスノート,TV,37,"Oct 4, 2006 to Jun 27, 2007",Fall 2006,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2140,1689,Byousoku 5 Centimeter,7.73,"[Drama, Romance, Slice of Life]",5 Centimeters Per Second,秒速５センチメートル,Movie,3,"Mar 3, 2007",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17371,2236,Toki wo Kakeru Shoujo,8.2,"[Adventure, Drama, Romance, Sci-Fi]",The Girl Who Leapt Through Time,時をかける少女,Movie,1,"Jul 15, 2006",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5164,11111,Another,7.55,"[Mystery, Horror, Supernatural, Thriller, ...",Another,アナザー,TV,12,"Jan 10, 2012 to Mar 27, 2012",Winter 2012,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3936,16498,Shingeki no Kyojin,8.48,"[Action, Military, Mystery, Super Power, D...",Attack on Titan,進撃の巨人,TV,25,"Apr 7, 2013 to Sep 29, 2013",Spring 2013,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16704,21995,Ao Haru Ride,7.67,"[Comedy, Drama, Romance, School, Shoujo, ...",Blue Spring Ride,アオハライド,TV,12,"Jul 8, 2014 to Sep 23, 2014",Summer 2014,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16584,22319,Tokyo Ghoul,7.81,"[Action, Mystery, Horror, Psychological, S...",Tokyo Ghoul,東京喰種-トーキョーグール-,TV,12,"Jul 4, 2014 to Sep 19, 2014",Summer 2014,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4752,23273,Shigatsu wa Kimi no Uso,8.74,"[Drama, Music, Romance, School, Shounen]",Your Lie in April,四月は君の嘘,TV,22,"Oct 10, 2014 to Mar 20, 2015",Fall 2014,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Remove the anime's that the user has seen from the whole list.

In [15]:
animeWithGenres_df = animeWithGenres_df[~animeWithGenres_df.isin(userAnimes).all(axis=1)]
animeWithGenres_df

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Super Power,Psychological,Yuri,Samurai,Martial Arts,Josei,Shoujo,Seinen,Yaoi,Shounen Ai
0,40176,Miru Tights: Cosplay Satsuei Tights,6.53,"[Ecchi, School]",Unknown,みるタイツ コスプレ撮影 タイツ,Special,1,"Aug 23, 2019",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,13969,Thermae Romae x Yoyogi Animation Gakuin Collab...,6.29,"[Comedy, Historical, Seinen]",Unknown,テルマエ・ロマエｘ代々木アニメーション学院企業コラボレーション,Special,1,"Jul 9, 2012",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,13459,Ribbon-chan,Unknown,[Comedy],Unknown,リボンちゃん,TV,24,"Apr 4, 2012 to Mar 27, 2013",Spring 2012,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,15617,Jinrui wa Suitai Shimashita Specials,7.23,"[Comedy, Fantasy, Seinen]",Humanity Has Declined Specials,人類は衰退しました,Special,6,"Sep 19, 2012 to Feb 20, 2013",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,19157,Youkai Watch,6.54,"[Comedy, Demons, Kids, Supernatural]",Yo-kai Watch,妖怪ウォッチ,TV,214,"Jan 8, 2014 to Mar 30, 2018",Winter 2014,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17557,32238,"Watashi wa, Kairaku Izonshou",6.2,[Hentai],Unknown,私は、快楽依存症,OVA,2,"Feb 26, 2016 to May 20, 2016",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17558,33552,Mameshiba Bangai-hen,5.75,"[Music, Comedy]",Unknown,豆しば番外編,Special,5,"2008 to Jun 20, 2019",Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17559,8476,Otome Youkai Zakuro,7.47,"[Demons, Historical, Military, Romance, Se...",Zakuro,おとめ妖怪 ざくろ,TV,13,"Oct 5, 2010 to Dec 28, 2010",Fall 2010,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17560,953,Jyu Oh Sei,7.26,"[Action, Sci-Fi, Adventure, Mystery, Drama...",Jyu-Oh-Sei:Planet of the Beast King,獣王星,TV,11,"Apr 14, 2006 to Jun 23, 2006",Spring 2006,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
userAnimes = userAnimes.reset_index(drop=True)

userGenreTable = userAnimes.iloc[:, 25:]
userGenreTable

Unnamed: 0,Ecchi,School,Comedy,Historical,Seinen,Fantasy,Demons,Kids,Supernatural,Slice of Life,...,Super Power,Psychological,Yuri,Samurai,Martial Arts,Josei,Shoujo,Seinen.1,Yaoi,Shounen Ai
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
inputAnime.Rating

0      7.5
1      7.5
2      7.5
3      6.0
4      7.0
5      7.5
6      7.0
7      5.5
8      6.5
9      8.0
10     8.0
11     7.5
12     8.5
13    10.0
14     5.0
15     7.5
16     8.0
17     8.0
18     6.0
19     9.5
20     8.5
21     6.0
22     7.0
23     8.0
Name: Rating, dtype: float64

In [18]:
userProfile = userGenreTable.transpose().dot(inputAnime['Rating'])
userProfile

Ecchi           0.0
 School        51.0
Comedy         10.5
 Historical     7.0
 Seinen        32.0
               ... 
Josei           0.0
Shoujo          0.0
Seinen          0.0
Yaoi            0.0
Shounen Ai      0.0
Length: 84, dtype: float64

In [19]:
genreTable = animeWithGenres_df.set_index(animeWithGenres_df['MAL_ID'])
genreTable = genreTable.iloc[:, 25:]
genreTable.head()

Unnamed: 0_level_0,Ecchi,School,Comedy,Historical,Seinen,Fantasy,Demons,Kids,Supernatural,Slice of Life,...,Super Power,Psychological,Yuri,Samurai,Martial Arts,Josei,Shoujo,Seinen,Yaoi,Shounen Ai
MAL_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
40176,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13969,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13459,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15617,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19157,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
genreTable.shape

(17538, 84)

In [21]:
recommendationTable_df = ((genreTable*userProfile).sum(axis=1))/(userProfile.sum())
recommendationTable_df.head()

MAL_ID
40176    0.064721
13969    0.062817
13459    0.013325
15617    0.112310
19157    0.120558
dtype: float64

In [22]:
recommendationTable_df = recommendationTable_df.sort_values(ascending=False)
recommendationTable_df.head()

MAL_ID
35009    0.517132
33       0.517132
449      0.496193
450      0.496193
451      0.496193
dtype: float64

In [23]:
top_mal_ids = recommendationTable_df.head(10).keys()
anime_df.set_index('MAL_ID').loc[top_mal_ids].reset_index()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,35009,Berserk Recap,6.02,"[Action, Adventure, Demons, Drama, Fantasy...",Unknown,ベルセルク 第1期ダイジェスト映像,Special,1,"Mar 3, 2017",Unknown,...,373.0,212.0,433.0,797.0,1019.0,663.0,293.0,183.0,140.0,231.0
1,33,Kenpuu Denki Berserk,8.49,"[Action, Adventure, Demons, Drama, Fantasy...",Berserk,剣風伝奇ベルセルク,TV,25,"Oct 8, 1997 to Apr 1, 1998",Fall 1997,...,58627.0,65906.0,60815.0,29055.0,9477.0,3899.0,1748.0,671.0,456.0,842.0
2,449,InuYasha Movie 4: Guren no Houraijima,7.54,"[Action, Adventure, Comedy, Historical, De...",InuYasha the Movie 4:Fire on the Mystic Island,犬夜叉 紅蓮の蓬莱島,Movie,1,"Dec 23, 2004",Unknown,...,5230.0,6127.0,9865.0,11837.0,5135.0,2190.0,671.0,225.0,92.0,73.0
3,450,InuYasha Movie 2: Kagami no Naka no Mugenjo,7.66,"[Action, Adventure, Comedy, Historical, De...",InuYasha the Movie 2:The Castle Beyond the Loo...,犬夜叉 鏡の中の夢幻城,Movie,1,"Dec 21, 2002",Unknown,...,6722.0,7566.0,11990.0,12862.0,5409.0,2184.0,607.0,206.0,96.0,71.0
4,451,InuYasha Movie 3: Tenka Hadou no Ken,7.8,"[Action, Adventure, Comedy, Historical, De...",InuYasha the Movie 3:Swords of an Honorable Ruler,犬夜叉 天下覇道の剣,Movie,1,"Dec 20, 2003",Unknown,...,6718.0,7647.0,11985.0,11322.0,4397.0,1687.0,395.0,160.0,60.0,66.0
5,452,InuYasha Movie 1: Toki wo Koeru Omoi,7.56,"[Action, Adventure, Comedy, Historical, De...",InuYasha the Movie:Affections Touching Across ...,犬夜叉 時代を越える想い,Movie,1,"Dec 22, 2001",Unknown,...,6033.0,6802.0,11048.0,13002.0,5767.0,2369.0,620.0,255.0,104.0,93.0
6,969,Tsubasa Chronicle 2nd Season,7.6,"[Action, Adventure, Fantasy, Romance, Supe...",Tsubasa RESERVoir CHRoNiCLE Season Two,ツバサ・クロニクル 第2シリーズ,TV,26,"Apr 29, 2006 to Nov 4, 2006",Spring 2006,...,6346.0,8909.0,14525.0,13920.0,6024.0,2757.0,1101.0,401.0,215.0,139.0
7,4938,Tsubasa: Shunraiki,8.13,"[Action, Adventure, Mystery, Supernatural, ...",Tsubasa RESERVoir CHRoNiCLE:Spring Thunder Chr...,ツバサ 春雷記,OVA,2,"Mar 17, 2009 to May 15, 2009",Unknown,...,4738.0,6561.0,8069.0,5065.0,1749.0,691.0,186.0,74.0,32.0,62.0
8,34055,Berserk 2nd Season,6.69,"[Action, Adventure, Demons, Drama, Fantasy...",Berserk:Season II,ベルセルク,TV,12,"Apr 7, 2017 to Jun 23, 2017",Spring 2017,...,5577.0,6876.0,12904.0,15677.0,9697.0,5290.0,4305.0,2697.0,2122.0,2798.0
9,2983,Digital Devil Story: Megami Tensei,5.21,"[Adventure, Mystery, Horror, Demons, Psych...",Unknown,デジタル・デビル物語〈ストーリ〉 女神転生,OVA,1,"Mar 25, 1987",Unknown,...,60.0,62.0,170.0,378.0,609.0,634.0,452.0,271.0,165.0,83.0
