This notebook includes the official implementation of the paper [**Anime Recommendation System Using Bert and Cosine Similarity**](https://doi.org/10.1109/AiDAS60501.2023.10284693) (AiDAS 2023).

In [1]:
# Import Libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity
import re
import string
from tabulate import tabulate

In [2]:
# Import Data
data = pd.read_csv('/kaggle/input/anime-recommendation-database-2020/anime.csv')
data = data[['MAL_ID','Name', 'Genres', 'Type', 'Episodes']]
data.columns = ['ID','Title', 'Genre', 'Type', 'Episodes']
len(data)

17562

In [3]:
# Data Cleaning
# Remove Missing Value Data
print(data.isnull().sum())
data = data.dropna()
print(data.isnull().sum())
print(len(data))

ID          0
Title       0
Genre       0
Type        0
Episodes    0
dtype: int64
ID          0
Title       0
Genre       0
Type        0
Episodes    0
dtype: int64
17562


In [4]:
# Data Preprocessing
def clean(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

data["cleaned_Title"] = data["Title"].apply(clean)
data["cleaned_Genre"] = data["Genre"].apply(clean)
data["cleaned_TitleGenre"] = data["cleaned_Title"] + " " + data["cleaned_Genre"]

In [5]:
indices = pd.Series(data.index,index=data['cleaned_TitleGenre']).drop_duplicates()

In [6]:
# Generate Similarity
def get_similarity(title):
    new_title = False
    feature = data["cleaned_TitleGenre"].tolist()
    if not(title in feature):
        new_title = True
        feature.append(title)
    tfidf = text.CountVectorizer()
    tfidf_matrix = tfidf.fit_transform(feature)
    similarity = cosine_similarity(tfidf_matrix)
    del tfidf
    del tfidf_matrix
    if (new_title):
        del feature
        return similarity[len(similarity) - 1]
    else:
        del feature
        index = pd.Series(indices[title])
        return similarity[index[0]]

In [7]:
# Generate Recommendation
def Anime_Recommendation(ID, title, genre):
    cleaned_title = clean(title)
    cleaned_genre = clean(genre)
    cleaned_TitleGenre = cleaned_title + " " + cleaned_genre
    similarity = get_similarity(cleaned_TitleGenre)
    similarity_scores = list(enumerate(similarity))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = filter(lambda x: x[1] > 0, similarity_scores)
    similarity_scores = list(similarity_scores)
    movieindices = [i[0] for i in similarity_scores if i[0] < len(data)]
    scores = [i[1] for i in similarity_scores if i[0] < len(data)]
    result = pd.DataFrame([data.iloc[i] for i in movieindices])
    result['Similarity'] = scores
    result = result[result["ID"] != ID]
    result = result[['Title', 'Similarity']][:10].copy()
    result = result.set_index([np.arange(1,11)])
    print("Title:", title)
    print("Genre:", genre)
    print("Anime Recommendation:")
    print(tabulate(result, headers='keys', tablefmt = 'psql'))
            
    del similarity
    del similarity_scores
    del movieindices
    del scores
    del result

In [8]:
def Anime(ID):
    anime = data.loc[data["ID"] == ID]
    title = anime.Title.item()
    genre = anime.Genre.item()
    Anime_Recommendation(ID, title, genre)

Anime(20)

Title: Naruto
Genre: Action, Adventure, Comedy, Super Power, Martial Arts, Shounen
Anime Recommendation:
+----+----------------------------------------+--------------+
|    | Title                                  |   Similarity |
|----+----------------------------------------+--------------|
|  1 | Naruto: Shippuuden                     |     0.948683 |
|  2 | Naruto x UT                            |     0.888889 |
|  3 | Dragon Ball Z                          |     0.80403  |
|  4 | Boruto: Naruto the Movie               |     0.80403  |
|  5 | Boruto: Naruto Next Generations        |     0.80403  |
|  6 | Dragon Ball Super                      |     0.801784 |
|  7 | Gatchaman                              |     0.777778 |
|  8 | Kyutai Panic Adventure!                |     0.777778 |
|  9 | Dragon Ball Kai                        |     0.7698   |
| 10 | Naruto: Shippuuden - Sunny Side Battle |     0.7698   |
+----+----------------------------------------+--------------+


In [9]:
Anime(34566)

Title: Boruto: Naruto Next Generations
Genre: Action, Adventure, Super Power, Martial Arts, Shounen
Anime Recommendation:
+----+--------------------------------------------+--------------+
|    | Title                                      |   Similarity |
|----+--------------------------------------------+--------------|
|  1 | Naruto                                     |     0.80403  |
|  2 | Naruto: Shippuuden                         |     0.76277  |
|  3 | Boruto: Naruto the Movie                   |     0.727273 |
|  4 | Gatchaman                                  |     0.703526 |
|  5 | Naruto x UT                                |     0.703526 |
|  6 | Kyutai Panic Adventure!                    |     0.703526 |
|  7 | Boruto: Jump Festa 2016 Special            |     0.696311 |
|  8 | Naruto: Shippuuden Movie 5 - Blood Prison  |     0.668994 |
|  9 | Naruto: Shippuuden Movie 6 - Road to Ninja |     0.668994 |
| 10 | Rekka no Honoo                             |     0.667424 |
+----+-

In [10]:
Anime(30694)

Title: Dragon Ball Super
Genre: Action, Adventure, Comedy, Super Power, Martial Arts, Fantasy, Shounen
Anime Recommendation:
+----+-----------------------------------------+--------------+
|    | Title                                   |   Similarity |
|----+-----------------------------------------+--------------|
|  1 | Dragon Ball Z                           |     0.966988 |
|  2 | Dragon Ball                             |     0.92967  |
|  3 | Super Dragon Ball Heroes                |     0.928571 |
|  4 | Dragon Ball Kai                         |     0.92582  |
|  5 | Dragon Ball Kai (2014)                  |     0.92582  |
|  6 | Dragon Ball Super Movie: Broly          |     0.897085 |
|  7 | Dragon Ball Z: Atsumare! Gokuu World    |     0.857143 |
|  8 | Dragon Ball Z: Summer Vacation Special  |     0.857143 |
|  9 | Dragon Ball Z Movie 15: Fukkatsu no "F" |     0.857143 |
| 10 | Dragon Ball: Episode of Bardock         |     0.828079 |
+----+-------------------------------------

If you found our implementation useful, please consider citing our paper:

C. G. Reswara, J. Nicolas, M. Ananta and F. I. Kurniadi, "Anime Recommendation System Using Bert and Cosine Similarity," 2023 4th International Conference on Artificial Intelligence and Data Sciences (AiDAS), IPOH, Malaysia, 2023, pp. 109-113, doi: [10.1109/AiDAS60501.2023.10284693](https://doi.org/10.1109/AiDAS60501.2023.10284693).