In [1]:
import pandas as pd

# r_cols = ['user_id','anime_id','rating']
ratings = pd.read_csv('.\\datos\\rating.csv')

# a_cols =  ['anime_id','name','genre','type','episodes','rating','members']
anime = pd.read_csv('.\\datos\\anime.csv')

# Filtrar
anime = anime[(anime.type == 'TV') | (anime.type == 'ONA')]
anime = anime[~anime['genre'].str.contains('Hentai', case=False, na=False)]
ratings = ratings[ratings.rating != -1]

merged = pd.merge(ratings, anime[['anime_id', 'name']], on='anime_id', how='inner')


print(f"Unique anime after filtering: {anime['name'].nunique()}")
print(f"Unique anime in ratings: {ratings['anime_id'].nunique()}")
common_anime = set(anime['anime_id']).intersection(set(ratings['anime_id']))
print(f"Number of anime with ratings and matching filtered anime: {len(common_anime)}")


Unique anime after filtering: 4444
Unique anime in ratings: 9927
Number of anime with ratings and matching filtered anime: 3545


In [2]:
userRatings = merged.pivot_table(index='user_id', columns='name', values='rating')
userRatings


name,.hack//Roots,.hack//Sign,.hack//Tasogare no Udewa Densetsu,001,009-1,07-Ghost,11eyes,12-sai.: Chicchana Mune no Tokimeki,12-sai.: Chicchana Mune no Tokimeki 2nd Season,2020 Nyeon Ujuui Wonder Kiddy,...,Zukkoke Knight: Don De La Mancha,ef: A Tale of Melodies.,ef: A Tale of Memories.,gdgd Fairies,gdgd Fairies 2,iDOLM@STER Xenoglossia,s.CRY.ed,xxxHOLiC,xxxHOLiC Kei,◯
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,2.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73511,,,,,,,,,,,...,,,,,,,,,,
73512,,,,,,,,,,,...,,,,,,,,,,
73513,,,,,,,,,,,...,,,,,,,,,,
73515,,,,,,,,,,,...,,,,,,,9.0,,,


In [3]:
# Excluimos los usuarios con menos de 30 ratings a la hora de calcular la correlación entre animes
min_ratings = 30
valid_users = userRatings.count(axis=1) >= min_ratings

corrMatrix = userRatings[valid_users].corr(method='pearson', min_periods=500)
corrMatrix

name,.hack//Roots,.hack//Sign,.hack//Tasogare no Udewa Densetsu,001,009-1,07-Ghost,11eyes,12-sai.: Chicchana Mune no Tokimeki,12-sai.: Chicchana Mune no Tokimeki 2nd Season,2020 Nyeon Ujuui Wonder Kiddy,...,Zukkoke Knight: Don De La Mancha,ef: A Tale of Melodies.,ef: A Tale of Memories.,gdgd Fairies,gdgd Fairies 2,iDOLM@STER Xenoglossia,s.CRY.ed,xxxHOLiC,xxxHOLiC Kei,◯
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
.hack//Roots,1.000000,0.591265,0.605264,,,,,,,,...,,,,,,,,,,
.hack//Sign,0.591265,1.000000,0.544762,,,0.330512,0.319127,,,,...,,,0.306728,,,,0.278255,0.274808,0.190780,
.hack//Tasogare no Udewa Densetsu,0.605264,0.544762,1.000000,,,,,,,,...,,,,,,,,,,
001,,,,,,,,,,,...,,,,,,,,,,
009-1,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
iDOLM@STER Xenoglossia,,,,,,,,,,,...,,,,,,,,,,
s.CRY.ed,,0.278255,,,,,,,,,...,,,,,,,1.000000,,,
xxxHOLiC,,0.274808,,,,0.283731,0.185011,,,,...,,0.315844,0.314020,,,,,1.000000,0.783368,
xxxHOLiC Kei,,0.190780,,,,0.325451,0.227983,,,,...,,0.338983,0.331701,,,,,0.783368,1.000000,


In [4]:
print(corrMatrix.shape)
myRatings = userRatings.loc[0].dropna()
print(myRatings)

(3545, 3545)
name
Hunter x Hunter (2011)    10.0
School Days                1.0
Name: 0, dtype: float64


In [5]:
simCandidates = pd.Series()
print(simCandidates)
for i in range(0, len(myRatings.index)):
    print ("Añadiendo animes similares a " + myRatings.index[i] + "...")
    # Recuperar los animes similares a las calificadas
    sims = corrMatrix[myRatings.index[i]].dropna()
    # Escalar la similaridad multiplicando la correlación por la calificación de la persona
    sims = sims.map(lambda x: x * myRatings[i])
    # Añadir el puntaje a la lista de candidatos similares
    simCandidates = pd.concat([simCandidates, sims])
    
#Mirar los resultados:
print ("Ordenando...")
simCandidates.sort_values(inplace = True, ascending = False)
print (simCandidates.head(20))

Series([], dtype: object)
Añadiendo animes similares a Hunter x Hunter (2011)...
Añadiendo animes similares a School Days...
Ordenando...
Hunter x Hunter (2011)                          10.000000
Hunter x Hunter                                  4.274228
Magi: The Kingdom of Magic                       3.820024
Yowamushi Pedal: Grande Road                     3.668675
Haikyuu!! Second Season                          3.577883
Bakuman. 2nd Season                              3.548238
Chihayafuru 2                                    3.512553
Hajime no Ippo                                   3.454682
Yowamushi Pedal                                  3.453029
JoJo no Kimyou na Bouken (TV)                    3.435015
Working!!!                                       3.425083
Hajime no Ippo: New Challenger                   3.423216
Boku no Hero Academia                            3.412778
Yuu☆Yuu☆Hakusho                                  3.404262
Natsume Yuujinchou San                           3

  sims = sims.map(lambda x: x * myRatings[i])
  simCandidates = pd.concat([simCandidates, sims])
  sims = sims.map(lambda x: x * myRatings[i])


In [6]:
simCandidates = simCandidates.groupby(simCandidates.index).sum()
simCandidates.sort_values(inplace = True, ascending = False)
filteredSims = simCandidates.drop(myRatings.index)
filteredSims.head(20)

Hunter x Hunter                        4.423264
Magi: The Kingdom of Magic             3.963887
Bakuman. 2nd Season                    3.745782
Yowamushi Pedal: Grande Road           3.668675
Hajime no Ippo: New Challenger         3.627060
Chihayafuru 2                          3.618302
Haikyuu!! Second Season                3.617458
Yowamushi Pedal                        3.598946
Jigoku Shoujo                          3.588386
Hajime no Ippo                         3.586686
Yuu☆Yuu☆Hakusho                        3.567869
Working!!!                             3.548638
JoJo no Kimyou na Bouken (TV)          3.532734
Boku no Hero Academia                  3.528041
Bakuman. 3rd Season                    3.514022
Mobile Suit Gundam 00 Second Season    3.503927
Magi: The Labyrinth of Magic           3.485920
Gugure! Kokkuri-san                    3.446314
Hajime no Ippo: Rising                 3.427689
Natsume Yuujinchou San                 3.421196
dtype: float64