In [24]:
# import libraries

import pandas as pd
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer

In [25]:
# reading data from file

anime_description = pd.read_csv('Anime_data.csv', encoding='latin-1')
anime_description.head()

Unnamed: 0,Anime_id,Title,Genre,Synopsis,Type,Producer,Studio,Rating,ScoredBy,Popularity,Members,Episodes,Source,Aired,Link
0,1,Cowboy Bebop,"['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...","In the year 2071, humanity has colonized sever...",TV,['Bandai Visual'],['Sunrise'],8.81,363889.0,39.0,704490.0,26.0,Original,"Apr 3, 1998 to Apr 24, 1999",https://myanimelist.net/anime/1/Cowboy_Bebop
1,5,Cowboy Bebop: Tengoku no Tobira,"['Action', 'Space', 'Drama', 'Mystery', 'Sci-Fi']","Another day, another bountyâsuch is the life...",Movie,"['Sunrise', 'Bandai Visual']",['Bones'],8.41,111187.0,475.0,179899.0,1.0,Original,"Sep 1, 2001",https://myanimelist.net/anime/5/Cowboy_Bebop__...
2,6,Trigun,"['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Vash the Stampede is the man with a $$60,000,0...",TV,['Victor Entertainment'],['Madhouse'],8.31,197451.0,158.0,372709.0,26.0,Manga,"Apr 1, 1998 to Sep 30, 1998",https://myanimelist.net/anime/6/Trigun
3,7,Witch Hunter Robin,"['Action', 'Magic', 'Police', 'Supernatural', ...",Witches are individuals with special powers li...,TV,['Bandai Visual'],['Sunrise'],7.34,31875.0,1278.0,74889.0,26.0,Original,"Jul 2, 2002 to Dec 24, 2002",https://myanimelist.net/anime/7/Witch_Hunter_R...
4,8,Bouken Ou Beet,"['Adventure', 'Fantasy', 'Shounen', 'Supernatu...",It is the dark century and the people are suff...,TV,,['Toei Animation'],7.04,4757.0,3968.0,11247.0,52.0,Manga,"Sep 30, 2004 to Sep 29, 2005",https://myanimelist.net/anime/8/Bouken_Ou_Beet


In [26]:
anime_tfidf = TfidfVectorizer(stop_words='english') # removing the stop words

anime_description['Synopsis'] = anime_description['Synopsis'].fillna('') # filling the missing values
anime_description_matrix = anime_tfidf.fit_transform(anime_description['Synopsis']) # computing TF-IDF matrix required for calculating cosine similarity
anime_description_matrix.shape # shape of computed matrix

(17002, 45350)

In [27]:
# computing cosine similarity matrix using linear_kernal of sklearn

cosine_similarity = linear_kernel(anime_description_matrix, anime_description_matrix)

In [28]:
indices = pd.Series(anime_description['Title'].index)

In [29]:
# Function to get the most similar animes
def recommend(index, cosine_sim=cosine_similarity):
  id = indices[index]

  # Get scores of all animes compared to that anime,
  # sorting them and getting top 5
  similarity_scores = list(enumerate(cosine_sim[id]))
  similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
  similarity_scores = similarity_scores[1:6]

  # Get the animes index
  anime_index = [i[0] for i in similarity_scores]

  # Return the top 5 most similar animes using integer-location based indexing
  return anime_description['Title'].iloc[anime_index]

In [30]:
# getting recommendation for anime at index 3
recommend(3)

16925              Motto! Ojamajo Doremi
13624                   Luminous Witches
11109    Zero kara Hajimeru Mahou no Sho
324              Ojamajo Doremi Dokkaan!
1549                  Strike Witches OVA
Name: Title, dtype: object

In [31]:
anime_description['Synopsis'].iloc[3]

"Witches are individuals with special powers like ESP, telekinesis, mind control, etc. Robin, a 15-year-old craft user, arrives from Italy to Japan to work for an organization named STN Japan Division (STN-J) as a replacement for one of STN-J's witch hunters who was recently killed. Unlike other divisions of STN, STN-J tries to capture the witches alive in order to learn why and how they became witches in the first place. \r\n \r\n(Source: ANN)"

In [32]:
anime_description['Synopsis'].iloc[16925]

'After the losing of magical ability to become witches, they once again become them. But, now they have to go through tests from the Witches of the Witch World. They also meet a new member, from New York, U.S. comes Momoko (the yellow Ojamajo).  \r\n \r\n(Source: ANN)'