# Content Based Recommender system

In [1]:
# In this recommender system the content of the movie 
# (overview, cast, crew, keyword, tagline etc) is used to find its similarity with other movies. 
# Then the movies that are most likely to be similar are recommended.

In [4]:
# let's import our data
import pandas as pd
df = pd.read_csv(r'C:\Users\kinwa\Downloads\Anime.csv\Anime.csv')

In [5]:
df.head()

Unnamed: 0,Rank,Name,Japanese_name,Type,Episodes,Studio,Release_season,Tags,Rating,Release_year,End_year,Description,Content_Warning,Related_Mange,Related_anime,Voice_actors,staff
0,1,Demon Slayer: Kimetsu no Yaiba - Entertainment...,Kimetsu no Yaiba: Yuukaku-hen,TV,,ufotable,Fall,"Action, Adventure, Fantasy, Shounen, Demons, H...",4.6,2021.0,,'Tanjiro and his friends accompany the Hashira...,Explicit Violence,Demon Slayer: Kimetsu no Yaiba,"Demon Slayer: Kimetsu no Yaiba, Demon Slayer: ...","Inosuke Hashibira : Yoshitsugu Matsuoka, Nezuk...","Koyoharu Gotouge : Original Creator, Haruo Sot..."
1,2,Fruits Basket the Final Season,Fruits Basket the Final,TV,13.0,TMS Entertainment,Spring,"Drama, Fantasy, Romance, Shoujo, Animal Transf...",4.6,2021.0,,'The final arc of Fruits Basket.',"Emotional Abuse,, Mature Themes,, Physical Abu...","Fruits Basket, Fruits Basket Another","Fruits Basket 1st Season, Fruits Basket 2nd Se...","Akito Sohma : Maaya Sakamoto, Kyo Sohma : Yuum...","Natsuki Takaya : Original Creator, Yoshihide I..."
2,3,Mo Dao Zu Shi 3,The Founder of Diabolism 3,Web,12.0,B.C MAY PICTURES,,"Fantasy, Ancient China, Chinese Animation, Cul...",4.58,2021.0,,'The third season of Mo Dao Zu Shi.',,Grandmaster of Demonic Cultivation: Mo Dao Zu ...,"Mo Dao Zu Shi 2, Mo Dao Zu Shi Q","Lan Wangji, Wei Wuxian, Jiang Cheng, Jin Guang...","Mo Xiang Tong Xiu : Original Creator, Xiong Ke..."
3,4,Fullmetal Alchemist: Brotherhood,Hagane no Renkinjutsushi: Full Metal Alchemist,TV,64.0,Bones,Spring,"Action, Adventure, Drama, Fantasy, Mystery, Sh...",4.58,2009.0,2010.0,"""The foundation of alchemy is based on the law...","Animal Abuse,, Mature Themes,, Violence,, Dome...","Fullmetal Alchemist, Fullmetal Alchemist (Ligh...","Fullmetal Alchemist: Brotherhood Specials, Ful...","Alphonse Elric : Rie Kugimiya, Edward Elric : ...","Hiromu Arakawa : Original Creator, Yasuhiro Ir..."
4,5,Attack on Titan 3rd Season: Part II,Shingeki no Kyojin Season 3: Part II,TV,10.0,WIT Studio,Spring,"Action, Fantasy, Horror, Shounen, Dark Fantasy...",4.57,2019.0,,'The battle to retake Wall Maria begins now! W...,"Cannibalism,, Explicit Violence","Attack on Titan, Attack on Titan: End of the W...","Attack on Titan, Attack on Titan 2nd Season, A...","Armin Arlelt : Marina Inoue, Eren Jaeger : Yuu...","Hajime Isayama : Original Creator, Tetsurou Ar..."


### Plot description based Recommender

In [6]:
# We will compute pairwise similarity scores for all animes based on their plot descriptions and 
# recommend animes based on that similarity score. 
# The plot description is given in the overview feature of our dataset. Let's take a look at the data. ..

In [9]:
df.Description.head()

0    'Tanjiro and his friends accompany the Hashira...
1                    'The final arc of Fruits Basket.'
2                 'The third season of Mo Dao Zu Shi.'
3    "The foundation of alchemy is based on the law...
4    'The battle to retake Wall Maria begins now! W...
Name: Description, dtype: object

In [10]:
# we need to convert the word vector for each description therefore we will TF-IDF VEctorizer

In [11]:
#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string
df['Description'] = df['Description'].fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(df['Description'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape

(18495, 34609)

In [12]:
# Since we have used the TF-IDF vectorizer, 
# calculating the dot product will directly give us the cosine similarity score. 
# Therefore, we will use sklearn's linear_kernel() instead of cosine_similarities() since it is faster.

In [13]:
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [14]:
# We are going to define a function that takes in animes title as an input and outputs 
# a list of the 10 most similar animes. Firstly, for this, 
# we need a reverse mapping of anime titles and DataFrame indices. In other words, 
# we need a mechanism to identify the index of an anime in our metadata DataFrame, given its title

In [16]:
#Construct a reverse map of indices and movie titles
indices = pd.Series(df.index, index=df['Name']).drop_duplicates()

In [24]:
# Function that takes in anime title as input and outputs most similar animes
def get_recommendations(name, cosine_sim=cosine_sim):
    # Get the index of the anime that matches the title
    idx = indices[name]

    # Get the pairwsie similarity scores of all animes with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the animes based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar animes
    sim_scores = sim_scores[1:11]

    # Get the anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar animes
    return df['Name'].iloc[anime_indices]

In [25]:
get_recommendations('Attack on Titan 3rd Season: Part II')

77                      Attack on Titan 2nd Season
42                      Attack on Titan 3rd Season
12772                                   Back Arrow
3895                  Attack on Titan: Junior High
6        Attack on Titan The Final Season: Part II
7977                                 Boundary line
4912                            Record of Ragnarok
4106               Robotech: The Shadow Chronicles
4477                        Happy Lucky Bikkuriman
748                    Attack on Titan: Lost Girls
Name: Name, dtype: object