# Anime Recommender System

In [None]:
%pip install matplotlib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

: 

In [None]:
df = pd.read_csv("anime.csv")
df

In [None]:
df.tail()

In [None]:
# df["Related_anime"][0]

In [None]:
df.isna().sum()

In [None]:
df.columns.values.tolist() 

In [None]:
df.info()

## Data Cleaning

### General Observations
1. Rank - Rank is quite important for recommending an anime, but since it is determined using the rating, we will not be taking this column.
2. Name - Not relevant to the recommendation.
3. Japanese_name - Not relevant to the recommendation.
4. Type - Anime type (TV or Web) is not a determining factor for recommendation purposes.
5. Episodes - The number of episodes is not considered influential for the recommendation system.
6. Studio - It could be somewhat influential, but for simplicity, we will not include this feature in this model (will be included in the next version).
7. Release_season - Release season is also not that influential.
8. Tags - This feature is quite important as one might like another anime having the same tags or the same genre.
9. Rating - It is quite an important feature to measure the quality of an anime.
10. Release_year - Not relevant to the recommendation.
11. End_year - It is also not that relevant.
12. Description - It is quite important as it tells us about the anime and can be useful for comparison.
13. Content_Warning - This could be a potential influencer in the recommender, but just for simplicity, we will not consider it (will be included in the next version).
14. Related_Mange - Not relevant to the recommendation.
15. Related_anime - This is also a good feature to recommend related anime, but since we included the description, we need not include this feature (might be included in the next version).
16. Voice_actors - Not relevant as it is not a determining factor to watch an anime.
17. Staff - The director might be an important feature, but just for simplicity, we will not consider it (will be included in the next version).

-It can be seen that most of ```NaN``` values are at the bottom of the dataset.

Based on our observations and preferences, here is the list of features that we are including in our anime recommendation model:

1. Tags
2. Rating
3. Description


In [None]:
new_df = df.drop([  'Rank',
                    'Japanese_name',
                    'Type',
                    'Episodes',
                    'Studio',
                    'Release_season',
                    'Release_year',
                    'End_year',
                    'Content_Warning',
                    'Related_Mange',
                    'Related_anime',
                    'Voice_actors',
                    'staff'],axis=1)
new_df

In [None]:
new_df.isna().sum()

Since description,rating and tags both are quite important features, we have to delete the rows with na in them, in order to make a good model.<br>
It can be seen that most of ```NaN``` values are at the bottom of the dataset.

In [None]:
final = new_df.dropna()

In [None]:
final.info()

### We will make a combined feature using Tags and Description

### Tags

In [None]:
final["Tags"][2].split(", ")
# we can seperate all the tags using ", " seperator.

In [None]:
def convert_tags(s):
    return s.split(", ")

In [None]:
final["Tags"] = final["Tags"].apply(convert_tags)
final

### Description

In [None]:
final["Description"] = final["Description"].apply(lambda x: x[1:-1].split())

In [None]:
final

### Removing spaces in all the tags.

In [None]:
final["Tags"] = final["Tags"].apply(lambda x:[i.replace(" ","") for i in x])

### Removing spaces in all the descriptions.

In [None]:
final["Description"] = final["Description"].apply(lambda x:[i.replace(" ","") for i in x])

In [None]:
final

In [None]:
final["Combined"] = final["Tags"]+final["Description"]
final

In [None]:
o = final[["Name","Combined","Rating"]]
o.info()

In [None]:
o["Combined"] = o["Combined"].apply(lambda x:" ".join(x))

In [None]:
o["Combined"] = o["Combined"].apply(lambda x:x.lower())
o

In [None]:
import nltk
from nltk.stem.porter import PorterStemmer

In [None]:
ps = PorterStemmer()
def helper(s):
    l = []
    for i in s.split():
        l.append(ps.stem(i))
    return " ".join(l)


In [None]:
o["Combined"] = o["Combined"].apply(helper)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words="english")

In [None]:
vectors = cv.fit_transform(o["Combined"]).toarray()

In [None]:
k = cv.get_feature_names_out()
for i in k:
    print(i)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity = cosine_similarity(vectors)

In [None]:
def recommend5animes(s):
    idx = o[o["Name"]==s].index[0]
    l = []
    for i in range(len(similarity)):
        l.append((similarity[idx][i],o["Rating"].iloc[i],i))
    l.sort(reverse=True)
    new = []
    for i,j,k in l[1:6]:
        new.append(o["Name"].iloc[k])
    return new

recommend5animes("Deltora Quest")



In [None]:
import pickle

In [None]:
pickle.dump(similarity,open('anime_list.pkl','wb'))

In [None]:
pickle.dump(o.to_dict(),open('movie_list.pkl','wb'))