# Import libraries

In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset

In [41]:
df=pd.read_csv("../data/processed/anime_processed.csv")

In [42]:
df.head()

Unnamed: 0,Name,Image URL,Tags
0,Cowboy Bebop,https://cdn.myanimelist.net/images/anime/4/196...,"Crime is timeless. By the year 2071, humanity ..."
1,Cowboy Bebop: Tengoku no Tobira,https://cdn.myanimelist.net/images/anime/1439/...,"Another day, another bounty—such is the life o..."
2,Trigun,https://cdn.myanimelist.net/images/anime/7/203...,"Vash the Stampede is the man with a $$60,000,0..."
3,Witch Hunter Robin,https://cdn.myanimelist.net/images/anime/10/19...,Robin Sena is a powerful craft user drafted in...
4,Bouken Ou Beet,https://cdn.myanimelist.net/images/anime/7/215...,It is the dark century and the people are suff...


# Convert 'Tags' to lower

In [43]:
df["Tags"]=df["Tags"].apply(lambda x:x.lower())

In [44]:
df.head()

Unnamed: 0,Name,Image URL,Tags
0,Cowboy Bebop,https://cdn.myanimelist.net/images/anime/4/196...,"crime is timeless. by the year 2071, humanity ..."
1,Cowboy Bebop: Tengoku no Tobira,https://cdn.myanimelist.net/images/anime/1439/...,"another day, another bounty—such is the life o..."
2,Trigun,https://cdn.myanimelist.net/images/anime/7/203...,"vash the stampede is the man with a $$60,000,0..."
3,Witch Hunter Robin,https://cdn.myanimelist.net/images/anime/10/19...,robin sena is a powerful craft user drafted in...
4,Bouken Ou Beet,https://cdn.myanimelist.net/images/anime/7/215...,it is the dark century and the people are suff...


# Vectorization

## Using TF-IDF Vectorizer

In [45]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [46]:
tfidf= TfidfVectorizer(stop_words="english",ngram_range=(1,1),max_features=5000)

## Stemming

In [47]:
import nltk
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()

In [48]:
def stem(text):
    y=[]
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [49]:
df["Tags"]= df["Tags"].apply(stem)

## Transform 'Tags' into 5000 features

In [50]:
vectors= tfidf.fit_transform(df["Tags"]).toarray()

In [51]:
vectors

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.36725923, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]], shape=(24905, 5000))

In [52]:
tfidf.get_feature_names_out()

array(['000', '10', '100', ..., 'zoo', 'zorori', 'zutomayo'],
      shape=(5000,), dtype=object)

# Applying cosine-similarity

In [53]:
from sklearn.metrics.pairwise import cosine_similarity

In [54]:
similarity= cosine_similarity(vectors)

In [55]:
similarity

array([[1.        , 0.15912997, 0.10487874, ..., 0.0285905 , 0.010375  ,
        0.010375  ],
       [0.15912997, 1.        , 0.05273632, ..., 0.05923106, 0.01264763,
        0.01264763],
       [0.10487874, 0.05273632, 1.        , ..., 0.04254759, 0.        ,
        0.        ],
       ...,
       [0.0285905 , 0.05923106, 0.04254759, ..., 1.        , 0.        ,
        0.        ],
       [0.010375  , 0.01264763, 0.        , ..., 0.        , 1.        ,
        1.        ],
       [0.010375  , 0.01264763, 0.        , ..., 0.        , 1.        ,
        1.        ]], shape=(24905, 24905))

In [56]:
similarity.shape

(24905, 24905)

# Finding closest anime

In [59]:
def recommend(anime):
    anime_index=df[df["Name"]==anime].index[0]
    distances=similarity[anime_index]
    movies_list=sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:11]
    
    for i in movies_list:
        print(df.iloc[i[0]].Name)

In [62]:
recommend("Naruto")

Naruto (2023)
Naruto: Shippuuden - Shippuu! "Konoha Gakuen" Den
Naruto: Shippuuden
Boruto: Naruto the Movie
Boruto: Naruto Next Generations
Naruto: Shippuuden Movie 4 - The Lost Tower
Naruto: Shippuuden Movie 6 - Road to Ninja
Naruto: Shippuuden Movie 5 - Blood Prison
Naruto: Shippuuden Movie 2 - Kizuna
Naruto: Honoo no Chuunin Shiken! Naruto vs. Konohamaru!!


# Save final dataset

In [63]:
import pickle

In [None]:
pickle.dump(df,open("../final_anime.pkl","wb"))

In [67]:
df.head()

Unnamed: 0,Name,Image URL,Tags
0,Cowboy Bebop,https://cdn.myanimelist.net/images/anime/4/196...,"crime is timeless. by the year 2071, human ha ..."
1,Cowboy Bebop: Tengoku no Tobira,https://cdn.myanimelist.net/images/anime/1439/...,"anoth day, anoth bounty—such is the life of th..."
2,Trigun,https://cdn.myanimelist.net/images/anime/7/203...,"vash the stamped is the man with a $$60,000,00..."
3,Witch Hunter Robin,https://cdn.myanimelist.net/images/anime/10/19...,robin sena is a power craft user draft into th...
4,Bouken Ou Beet,https://cdn.myanimelist.net/images/anime/7/215...,it is the dark centuri and the peopl are suffe...
