# Sistema de recomendación de anime

In [31]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [32]:
anime = pd.read_csv('https://bit.ly/3Ye5Scj')
anime.head(2)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665


In [33]:
# Verificando nulos
anime.shape, anime.isnull().sum()

((10486, 7),
 anime_id      0
 name          0
 genre        62
 type         23
 episodes      0
 rating      211
 members       0
 dtype: int64)

## Data preprocessing

In [34]:
# Eliminar símbolos "raros" de los nombres
import re
anime.name = anime.name.map(lambda name:re.sub('[^A-Za-z0-9]+', " ", name))
anime.head(2)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665


In [None]:
known_animes = {"Naruto Shippuuden": 500, "One Piece": 784, "Detective Conan": 854, "Dragon Ball Super": 86,
                "Crayon Shin chan": 942, "Yu Gi Oh Arc V": 148, "Shingeki no Kyojin Season 2": 25,
                "Boku no Hero Academia 2nd Season": 25, "Little Witch Academia TV": 25}
for k, v in known_animes.items():
    anime.loc[anime['name'] == k, 'episodes'] = v

In [36]:
anime.loc[(anime['type'] == 'OVA') & (anime['episodes'] == 'Unknown'), 'episodes'] = 1
anime.loc[(anime['type'] == 'Movie') & (anime['episodes'] == 'Unknown'), 'episodes'] = 1
anime.loc[(anime['genre'] == 'Hentai') & (anime['episodes'] == 'Unknown'), 'episodes'] = 1

In [37]:
anime['episodes'] = anime['episodes'].map(lambda x: np.nan if x == 'Unknown' else x)
anime['episodes'].fillna(anime['episodes'].median(), inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  anime['episodes'].fillna(anime['episodes'].median(), inplace = True)


In [38]:
pd.get_dummies(anime[['type']]).head(2)

Unnamed: 0,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
0,True,False,False,False,False,False
1,False,False,False,False,False,True


In [39]:
anime['members'] = anime['members'].astype(float)
anime['rating'] = anime['rating'].astype(float)
anime['rating'].fillna(anime['rating'].median(), inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  anime['rating'].fillna(anime['rating'].median(), inplace = True)


In [40]:
anime_features = pd.concat([anime['genre'].str.get_dummies(sep=','),
                            pd.get_dummies(anime[['type']]),
                            anime['rating'], anime['members'], anime['episodes']], axis=1)
anime_features.head(2)

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Fantasy,Game,Harem,Historical,...,Vampire,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,rating,members,episodes
0,0,0,0,0,0,0,0,0,0,0,...,0,True,False,False,False,False,False,9.37,200630.0,1
1,1,0,0,0,0,1,1,0,0,0,...,0,False,False,False,False,False,True,9.26,793665.0,64


In [41]:
anime_features.shape

(10486, 84)

In [42]:
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
anime_features = mms.fit_transform(anime_features)
np.round(anime_features, 2)

array([[0.  , 0.  , 0.  , ..., 0.92, 0.2 , 0.  ],
       [1.  , 0.  , 0.  , ..., 0.91, 0.78, 0.03],
       [0.  , 0.  , 1.  , ..., 0.91, 0.11, 0.03],
       ...,
       [0.  , 0.  , 0.  , ..., 0.59, 0.  , 0.  ],
       [0.  , 0.  , 0.  , ..., 0.59, 0.  , 0.  ],
       [0.  , 0.  , 0.  , ..., 0.59, 0.  , 0.  ]])

## Modelo K-vecinos

In [None]:
# Ball Tree es un algoritmo de búsqueda de vecinos cercanos que se basa en la estructura de datos de árbol de bolas.
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=6, algorithm='ball_tree')
nbrs.fit(anime_features)

In [44]:
distances, indices = nbrs.kneighbors(anime_features)

In [45]:
all_anime_names = list(anime.name.values)
# funciones auxiliares
def get_index_from_name(name):
    return anime[anime["name"]==name].index.tolist()[0]
def get_id_from_partial_name(partial):
    for name in all_anime_names:
        if partial in name:
            print(name, all_anime_names.index(name))

In [46]:
# uso de get_id_from_partial_name
get_id_from_partial_name("Naruto")

Boruto Naruto the Movie 479
Naruto Shippuuden 607
The Last Naruto the Movie 709
Naruto Shippuuden Movie 6 Road to Ninja 770
Naruto 823
Boruto Naruto the Movie Naruto ga Hokage ni Natta Hi 1069
Naruto Shippuuden Movie 5 Blood Prison 1195
Naruto x UT 1296
Naruto Shippuuden Movie 4 The Lost Tower 1417
Naruto Shippuuden Movie 3 Hi no Ishi wo Tsugu Mono 1511
Naruto Shippuuden Movie 1 1754
Naruto Shippuuden Movie 2 Kizuna 1755
Naruto Shippuuden Shippuu quot Konoha Gakuen quot Den 2255
Naruto Honoo no Chuunin Shiken Naruto vs Konohamaru  2296
Naruto SD Rock Lee no Seishun Full Power Ninden 2334
Naruto Shippuuden Sunny Side Battle 2335
Naruto Movie 1 Dai Katsugeki Yuki Hime Shinobu Houjou Dattebayo  2606
Naruto Soyokazeden Movie Naruto to Mashin to Mitsu no Onegai Dattebayo  2827
Naruto Movie 2 Dai Gekitotsu Maboroshi no Chiteiiseki Dattebayo  3235
Naruto Dai Katsugeki Yuki Hime Shinobu Houjou Dattebayo Special Konoha Annual Sports Festival 3306
Naruto Movie 3 Dai Koufun Mikazuki Jima no Anima

In [49]:
# busca animes similares, puede ser por id o por nombre
def print_similar_animes(query=None,id=None):
    if id:
        for id in indices[id][1:]:
            print(anime.iloc[id]["name"])
    if query:
        found_id = get_index_from_name(query)
        for id in indices[found_id][1:]:
            print(anime.iloc[id]["name"])

In [50]:
# uso de print_similar_animes
print_similar_animes(id=719)

Mobile Suit Gundam 00 Second Season
Mobile Suit Gundam 00
Soukou Kihei Votoms
Mobile Suit Gundam Unicorn RE 0096
Mobile Suit Gundam Seed


In [51]:
print_similar_animes(query="Naruto")

Naruto Shippuuden
Katekyo Hitman Reborn 
Bleach
Dragon Ball Z
Boku no Hero Academia


In [52]:
print_similar_animes("Noragami")

Noragami Aragoto
JoJo no Kimyou na Bouken TV 
JoJo no Kimyou na Bouken Stardust Crusaders
JoJo no Kimyou na Bouken Stardust Crusaders 2nd Season
Yumekui Merry


In [53]:
print_similar_animes("Gintama")

Gintama 039 
Gintama 
Gintama 039 Enchousen
Gintama 2017 
Gintama Movie Kanketsu hen Yorozuya yo Eien Nare


In [55]:
print_similar_animes("Fairy Tail")

Fairy Tail 2014 
Magi The Labyrinth of Magic
Magi The Kingdom of Magic
Densetsu no Yuusha no Densetsu
Magi Sinbad no Bouken TV 
