In [1]:
import pandas as pd
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import spacy
from tqdm.notebook import tqdm

In [2]:
nlp = spacy.load("en_core_web_lg")
tqdm.pandas()

In [3]:
os.listdir('./data')

['animelist.csv',
 'anime.csv',
 '.DS_Store',
 'html folder',
 'watching_status.csv',
 'rating_complete.csv',
 '.ipynb_checkpoints',
 'anime_with_synopsis.csv']

In [4]:
for i in os.listdir('./data'):
    if '.csv' not in i:
        continue
    print(i)
    display(next(pd.read_csv(f'./data/{i}',chunksize=5)))

animelist.csv


Unnamed: 0,user_id,anime_id,rating,watching_status,watched_episodes
0,0,67,9,1,1
1,0,6702,7,1,4
2,0,242,10,1,4
3,0,4898,0,1,1
4,0,21,10,1,0


anime.csv


Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


watching_status.csv


Unnamed: 0,status,description
0,1,Currently Watching
1,2,Completed
2,3,On Hold
3,4,Dropped
4,6,Plan to Watch


rating_complete.csv


Unnamed: 0,user_id,anime_id,rating
0,0,430,9
1,0,1004,5
2,0,3010,7
3,0,570,7
4,0,2762,9


anime_with_synopsis.csv


Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [5]:
df = pd.read_csv('./data/anime_with_synopsis.csv').dropna(subset=['sypnopsis'])

In [6]:
df

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...
...,...,...,...,...,...
16209,48481,Daomu Biji Zhi Qinling Shen Shu,Unknown,"Adventure, Mystery, Supernatural",No synopsis information has been added to this...
16210,48483,Mieruko-chan,Unknown,"Comedy, Horror, Supernatural",ko is a typical high school student whose life...
16211,48488,Higurashi no Naku Koro ni Sotsu,Unknown,"Mystery, Dementia, Horror, Psychological, Supe...",Sequel to Higurashi no Naku Koro ni Gou .
16212,48491,Yama no Susume: Next Summit,Unknown,"Adventure, Slice of Life, Comedy",New Yama no Susume anime.


In [7]:
df['sim_vocab'] = df['sypnopsis'].progress_apply(nlp)

  0%|          | 0/16206 [00:00<?, ?it/s]

In [8]:
df['sim_vocab']

0        (In, the, year, 2071, ,, humanity, has, coloni...
1        (other, day, ,, another, bounty, —, such, is, ...
2        (Vash, the, Stampede, is, the, man, with, a, $...
3        (ches, are, individuals, with, special, powers...
4        (It, is, the, dark, century, and, the, people,...
                               ...                        
16209    (No, synopsis, information, has, been, added, ...
16210    (ko, is, a, typical, high, school, student, wh...
16211    (Sequel, to, Higurashi, no, Naku, Koro, ni, Go...
16212                    (New, Yama, no, Susume, anime, .)
16213    (Solar, calendar, year, 2020, :, grotesque, or...
Name: sim_vocab, Length: 16206, dtype: object

In [9]:
df['sim_vocab'] = df['sim_vocab'].progress_apply(lambda w: ' '.join([x.lemma_.lower() for x in w if not x.is_stop and not x.is_punct and not x.like_num]))

  0%|          | 0/16206 [00:00<?, ?it/s]

In [10]:
count = CountVectorizer()
count_matrix = count.fit_transform(df['sim_vocab'])

In [11]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[1.        , 0.22965577, 0.07401698, ..., 0.        , 0.04602873,
        0.08626176],
       [0.22965577, 1.        , 0.09811049, ..., 0.        , 0.        ,
        0.03387885],
       [0.07401698, 0.09811049, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.04602873, 0.        , 0.        , ..., 0.        , 1.        ,
        0.07808688],
       [0.08626176, 0.03387885, 0.        , ..., 0.        , 0.07808688,
        1.        ]])

In [68]:
# function that takes in movie title as input and returns the top 10 recommended movies
def recommendations(title, cosine_sim = cosine_sim):
    
    recommended_movies = []
    
    # gettin the index of the movie that matches the title
    idx = df[df['Name'] == title].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx], name = 'Similarity').sort_values(ascending = False)

    # getting the indexes of the 10 most similar movies
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    # populating the list with the titles of the best 10 matching movies
    for i in top_10_indexes:
        recommended_movies.append(list(df.index)[i])

    return recommended_movies, score_series.iloc[1:11]

In [78]:
recs, scores = recommendations('Naruto')
df.loc[recs].merge(scores,left_index=True, right_index=True)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,sim_vocab,Similarity
11346,34566,Boruto: Naruto Next Generations,5.81,"Action, Adventure, Super Power, Martial Arts, ...",Following the successful end of the Fourth Shi...,follow successful end shinobi world war konoha...,0.386094
1508,1735,Naruto: Shippuuden,8.16,"Action, Adventure, Comedy, Super Power, Martia...",It has been two and a half years since Naruto ...,half year naruto uzumaki leave konohagakure hi...,0.33888
6158,13667,Naruto: Shippuuden Movie 6 - Road to Ninja,7.67,"Action, Adventure, Super Power, Martial Arts, ...","urning home to Konohagakure, the young ninja c...",urne home konohagakure young ninja celebrate d...,0.303344
8831,28755,Boruto: Naruto the Movie,7.5,"Action, Comedy, Martial Arts, Shounen, Super P...","The spirited Boruto Uzumaki, son of Seventh Ho...",spirited boruto uzumaki son hokage naruto skil...,0.291417
10274,32438,Mayoiga,5.53,"Mystery, Comedy, Horror, Psychological, Drama",us full of eccentric individuals is headed tow...,eccentric individual head urban legend know na...,0.267414
546,594,Naruto: Takigakure no Shitou - Ore ga Eiyuu Da...,6.76,"Action, Adventure, Comedy, Shounen, Super Power",outine rank-C mission turned into a full-blown...,outine rank c mission turn blow battle hidden ...,0.262152
3103,4134,"Naruto: Shippuuden - Shippuu! ""Konoha Gakuen"" Den",7.15,Comedy,Naruto school special. Naruto is a new cool st...,naruto school special naruto new cool student ...,0.260242
5487,10589,Naruto: Shippuuden Movie 5 - Blood Prison,7.46,"Action, Adventure, Martial Arts, Super Power, ...",fter being captured for attempting to assassin...,fter capture attempt assassinate leader kumoga...,0.250651
1952,2248,Naruto: Dai Katsugeki!! Yuki Hime Shinobu Houj...,6.87,"Action, Comedy, Sports, Martial Arts, Fantasy,...",The genin of Konoha are having a sports day fi...,genin konoha have sport day fill race obstacle...,0.250027
6026,12979,Naruto SD: Rock Lee no Seishun Full-Power Ninden,7.14,"Action, Comedy, Parody",lcome to the Hidden Leaf Village. The village ...,lcome hidden leaf village village uzumaki naru...,0.248513


In [80]:
recs, scores = recommendations('Bleach')
df.loc[recs].merge(scores,left_index=True, right_index=True)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,sim_vocab,Similarity
1468,1686,Bleach Movie 1: Memories of Nobody,7.45,"Action, Adventure, Comedy, Super Power, Supern...","hen a life ends, its soul departs to its final...",hen life end soul depart final resting place k...,0.464581
3342,4835,Bleach Movie 3: Fade to Black - Kimi no Na wo ...,7.52,"Action, Adventure, Comedy, Super Power, Supern...",fter a mysterious pair attack Rukia Kuchiki an...,fter mysterious pair attack rukia kuchiki eras...,0.435297
5020,9441,Yume-iro Pâtissière SP Professional,7.6,"Slice of Life, Shoujo",Upon her return to Japan after a two year stud...,return japan year study break paris ichigo ama...,0.29429
4599,8247,Bleach Movie 4: Jigoku-hen,7.61,"Action, Adventure, Comedy, Super Power, Supern...","""Hell"" is the place where a person is sent to ...",hell place person send commit violent crime al...,0.289304
731,834,Bleach: The Sealed Sword Frenzy,6.98,"Action, Adventure, Comedy, Super Power, Supern...",Shinigami named Baishin who was sealed by Soul...,shinigami name baishin seal soul society long ...,0.281489
627,687,Tokyo Mew Mew,6.99,"Sci-Fi, Comedy, Magic, Romance, Fantasy, Shoujo",Ichigo Momomiya expected her date with her cru...,ichigo momomiya expect date crush masaya aoyam...,0.256453
7803,22819,Aikatsu! Movie,7.41,"Slice of Life, Music, School, Shoujo","The story of the anime revolves around Ichigo,...",story anime revolve ichigo normal middle schoo...,0.213276
7395,20889,Kuro no Su: Chronus,6.84,Psychological,"For as long as he can remember, Makoto Nakazon...",long remember makoto nakazono power grim reape...,0.206504


In [81]:
recs, scores = recommendations('One Piece')
df.loc[recs].merge(scores,left_index=True, right_index=True)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,sim_vocab,Similarity
5997,12859,One Piece Film: Z,8.18,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",The Straw Hat Pirates enter the rough seas of ...,straw hat pirates enter rough sea new world se...,0.301316
419,459,One Piece Movie 1,7.1,"Action, Adventure, Comedy, Super Power, Fantas...","oonan is the legendary Great Gold Pirate, earn...",oonan legendary great gold pirate earn nicknam...,0.281901
1102,1237,One Piece: Oounabara ni Hirake! Dekkai Dekkai ...,7.29,"Action, Adventure, Comedy, Fantasy, Shounen, S...","The story opens on Pirate Zap's ship, where tw...",story open pirate zap ship crew bonnie max tir...,0.275783
4789,8740,One Piece Film: Strong World Episode 0,8.0,"Action, Adventure, Comedy, Fantasy, Shounen, S...",Set over 20 years prior to the main One Piece ...,set year prior main piece story limited releas...,0.273483
1425,1638,Peter Pan no Bouken,6.84,"Adventure, Fantasy",dy and her two little brothers are brought to ...,dy little brother bring land adventure neverla...,0.257396
3507,5252,One Piece: Romance Dawn Story,7.39,"Action, Fantasy, Comedy, Super Power, Shounen","The Straw Hat Pirates, searching for the great...",straw hat pirates search great passage grand l...,0.256169
3111,4155,One Piece Film: Strong World,8.17,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",Upon hearing news that islands in East Blue ar...,hear news island east blue destroy monkey d. l...,0.249371
424,464,One Piece Movie 6: Omatsuri Danshaku to Himits...,7.7,"Adventure, Comedy, Fantasy, Shounen","""If you are a pirate among pirates among pirat...",pirate pirate pirate pirate gather steadfast c...,0.236025


In [85]:
recs, scores = recommendations('Gintama')
df.loc[recs].merge(scores,left_index=True, right_index=True)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,sim_vocab,Similarity
4333,7472,Gintama Movie 1: Shinyaku Benizakura-hen,8.52,"Action, Sci-Fi, Comedy, Historical, Parody, Sa...","Gintoki and his Yorozuya friends (or rather, e...",gintoki yorozuya friend employee suffer labor ...,0.220116
9563,30705,Makura no Danshi,4.51,Slice of Life,hispering sweet lullabies into the ears of the...,hispere sweet lullaby ear viewer makura danshi...,0.210851
8879,28977,Gintama°,9.1,"Action, Comedy, Historical, Parody, Samurai, S...","Gintoki, Shinpachi, and Kagura return as the f...",gintoki shinpachi kagura return fun love break...,0.200381
250,282,Angel Heart,7.3,"Action, Mystery, Drama, Romance, Seinen","oung Taiwanese assassin codenamed ""Glass Heart...",oung taiwanese assassin codename glass heart c...,0.189432
5427,10456,Kyoukaisenjou no Horizon,7.1,"Action, Sci-Fi, Fantasy","In the far future, humans abandon a devastated...",far future human abandon devastated earth trav...,0.182892
3326,4790,Zeno: Kagirinaki Ai ni,6.52,"Historical, Drama",Zeno: Kagirinaki Ai ni is a movie about life a...,zeno kagirinaki ai ni movie life work brother ...,0.181458
1390,1592,Hataraki Man,7.03,"Comedy, Drama, Romance, Seinen, Slice of Life",Hiroko Matsukata is a woman who works for a ma...,hiroko matsukata woman work magazine company p...,0.17108
2339,2746,Vexille: 2077 Nihon Sakoku,6.94,"Action, Military, Sci-Fi","In an alternate 21st century, the robotics ind...",alternate century robotic industry undergo per...,0.162822
4560,8182,Bouken Shounen Shadar,Unknown,"Adventure, Horror",hen Earth is threatened by the invading Ghosta...,hen earth threaten invade ghostar young boy ne...,0.161296
