This notebook includes the official implementation of the paper [**Anime Recommendation System Using Bert and Cosine Similarity**](https://doi.org/10.1109/AiDAS60501.2023.10284693) (AiDAS 2023).

In [1]:
# Import Libraries
import numpy as np
import pandas as pd
import re
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from sklearn.metrics.pairwise import cosine_similarity
import string
from tabulate import tabulate

In [2]:
# Import BERT Preprocessor and Encoder
preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3")
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/4", trainable=True)

In [3]:
# Import Data
data = pd.read_csv('/kaggle/input/anime-recommendation-database-2020/anime.csv')
data = data[['MAL_ID','Name', 'Genres', 'Type', 'Episodes']]
data.columns = ['ID','Title', 'Genre', 'Type', 'Episodes']
len(data)

17562

In [4]:
# Data Cleaning
# Remove Missing Value Data
print(data.isnull().sum())
data = data.dropna()
print(data.isnull().sum())
print(len(data))

ID          0
Title       0
Genre       0
Type        0
Episodes    0
dtype: int64
ID          0
Title       0
Genre       0
Type        0
Episodes    0
dtype: int64
17562


In [5]:
# Data Preprocessing
def clean(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

data["cleaned_Title"] = data["Title"].apply(clean)
data["cleaned_Genre"] = data["Genre"].apply(clean)
data["cleaned_TitleGenre"] = data["cleaned_Title"] + " " + data["cleaned_Genre"]

In [6]:
# Data Embedding
def get_bert_embeddings(text, preprocessor, encoder):
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
    encoder_inputs = preprocessor(text_input)
    outputs = encoder(encoder_inputs)
    embedding_model = tf.keras.Model(text_input, outputs['pooled_output'])
    sentences = tf.constant([text])
    return embedding_model(sentences)

with tf.device('/device:GPU:0'):
    data['encodings'] = data['cleaned_TitleGenre'].apply(lambda x: get_bert_embeddings(x, preprocessor, encoder))

In [7]:
# Generate Recommendation
def Anime_Recommendation(ID, title, genre):
    cleaned_title = clean(title)
    cleaned_genre = clean(genre)
    cleaned_TitleGenre = cleaned_title + " " + cleaned_genre
    query_encoding = get_bert_embeddings(cleaned_TitleGenre, preprocessor, encoder)
    print_data = data.copy()
    print_data['similarity_score'] = print_data['encodings'].apply(lambda x: cosine_similarity(x, query_encoding)[0][0])
    print_data = print_data.sort_values(by=['similarity_score'], ascending=False)
    print_data = print_data[print_data["ID"] != ID]
    print_data = print_data[['Title', 'similarity_score']][:10].copy()
    print_data.columns = ['Title', 'Similarity']
    print_data = print_data.set_index([np.arange(1,11)])
    print("Title:", title)
    print("Genre:", genre)
    print("Anime Recommendation:")
    print(tabulate(print_data, headers='keys', tablefmt = 'psql'))
    del print_data

In [8]:
def Anime(ID):
    anime = data.loc[data["ID"] == ID]
    title = anime.Title.item()
    genre = anime.Genre.item()
    Anime_Recommendation(ID, title, genre)

Anime(20)

Title: Naruto
Genre: Action, Adventure, Comedy, Super Power, Martial Arts, Shounen
Anime Recommendation:
+----+--------------------------------------------+--------------+
|    | Title                                      |   Similarity |
|----+--------------------------------------------+--------------|
|  1 | Naruto x UT                                |     0.99046  |
|  2 | Tenjou Tenge                               |     0.986061 |
|  3 | Boruto: Jump Festa 2016 Special            |     0.985153 |
|  4 | The Last: Naruto the Movie                 |     0.984817 |
|  5 | Dragon Ball Z Movie 15: Fukkatsu no "F"    |     0.982405 |
|  6 | Street Fighter II Movie                    |     0.981987 |
|  7 | Dragon Ball Z Movie 14: Kami to Kami       |     0.981082 |
|  8 | Grappler Baki                              |     0.978463 |
|  9 | Street Fighter Zero The Animation          |     0.977244 |
| 10 | Toriko: Jump Super Anime Tour 2009 Special |     0.976173 |
+----+------------------

In [9]:
Anime(34566)

Title: Boruto: Naruto Next Generations
Genre: Action, Adventure, Super Power, Martial Arts, Shounen
Anime Recommendation:
+----+------------------------------------------------------------------+--------------+
|    | Title                                                            |   Similarity |
|----+------------------------------------------------------------------+--------------|
|  1 | Naruto: Shippuuden Movie 6 - Road to Ninja                       |     0.991309 |
|  2 | Baoh Raihousha                                                   |     0.988921 |
|  3 | Gunjou no Magmell                                                |     0.987854 |
|  4 | Yuu☆Yuu☆Hakusho: Meikai Shitou-hen - Honoo no Kizuna             |     0.987785 |
|  5 | Naruto: Honoo no Chuunin Shiken! Naruto vs. Konohamaru!!         |     0.987487 |
|  6 | Rekka no Honoo                                                   |     0.987453 |
|  7 | Tatakae!! Ramenman                                               |    

In [10]:
Anime(30694)

Title: Dragon Ball Super
Genre: Action, Adventure, Comedy, Super Power, Martial Arts, Fantasy, Shounen
Anime Recommendation:
+----+----------------------------------------+--------------+
|    | Title                                  |   Similarity |
|----+----------------------------------------+--------------|
|  1 | Dragon Ball Kai (2014)                 |     0.993424 |
|  2 | Dragon Ball Super Movie: Broly         |     0.992845 |
|  3 | Dragon Ball GT                         |     0.992399 |
|  4 | Super Dragon Ball Heroes               |     0.99194  |
|  5 | Dragon Ball: Episode of Bardock        |     0.991751 |
|  6 | Dragon Ball Z: Summer Vacation Special |     0.989389 |
|  7 | Dragon Ball Z: Atsumare! Gokuu World   |     0.988997 |
|  8 | Dragon Ball                            |     0.988883 |
|  9 | Dragon Ball Kai                        |     0.987591 |
| 10 | Fairy Tail Movie 2: Dragon Cry         |     0.987343 |
+----+----------------------------------------+---------

If you found our implementation useful, please consider citing our paper:

C. G. Reswara, J. Nicolas, M. Ananta and F. I. Kurniadi, "Anime Recommendation System Using Bert and Cosine Similarity," 2023 4th International Conference on Artificial Intelligence and Data Sciences (AiDAS), IPOH, Malaysia, 2023, pp. 109-113, doi: [10.1109/AiDAS60501.2023.10284693](https://doi.org/10.1109/AiDAS60501.2023.10284693).