In [4]:
import sys
sys.path.append("../Lib")
import utils as ut
import pandas as pd
import scipy as sp
import operator
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings("ignore")

In [5]:
df_model = pd.read_csv('../Data/csv/model.csv', encoding='utf-8')

### User Recommendation
En este modelo vamos a aplicar el filtro usuario-elemento, es decir, tomar un usuario, encontrar usuarios similares y recomendar elementos que les hayan gustado a esos usuarios similares. En este caso, la entrada es un usuario y la salida es una lista de juegos recomendados para ese usuario.

In [6]:
df_model

Unnamed: 0,item_id,item_name,genres,user_id,rating
0,282010,Carmageddon Max Pack,Action,UTNerd24,1
1,282010,Carmageddon Max Pack,Action,saint556,5
2,282010,Carmageddon Max Pack,Action,chidvd,5
3,282010,Carmageddon Max Pack,Action,aerpub,1
4,282010,Carmageddon Max Pack,Action,76561198001688490,5
...,...,...,...,...,...
3382334,80,Counter-Strike: Condition Zero,Action,jazzecks,5
3382335,80,Counter-Strike: Condition Zero,Action,lachwe,5
3382336,80,Counter-Strike: Condition Zero,Action,danebuchanan,5
3382337,80,Counter-Strike: Condition Zero,Action,76561198208507532,1


In [7]:
df_mod_user = df_model[['user_id', 'item_name', 'rating']]
df_mod_user.drop_duplicates(inplace=True)
df_mod_user

Unnamed: 0,user_id,item_name,rating
0,UTNerd24,Carmageddon Max Pack,1
1,saint556,Carmageddon Max Pack,5
2,chidvd,Carmageddon Max Pack,5
3,aerpub,Carmageddon Max Pack,1
4,76561198001688490,Carmageddon Max Pack,5
...,...,...,...
3382334,jazzecks,Counter-Strike: Condition Zero,5
3382335,lachwe,Counter-Strike: Condition Zero,5
3382336,danebuchanan,Counter-Strike: Condition Zero,5
3382337,76561198208507532,Counter-Strike: Condition Zero,1


In [8]:
# Vamos a crear una matriz que contenga el 'user_id' como índices, los nombres de los elementos como columnas y la 'calificación' como valores.
u_matrix = df_mod_user.pivot_table(index=['user_id'], columns=['item_name'], values='rating')
u_matrix

item_name,! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,$1 Ride,'n Verlore Verstand,001 Game Creator,0RBITALIS,1 Moment Of Time: Silentville,"1,000 Heads Among the Trees",10 Second Ninja,...,rymdkapsel,sZone-Online,samurai_jazz,simian.interface++,stratO,the static speaks my name,theBlu,theHunter: Primal,侠客风云传(Tale of Wuxia),軒轅劍外傳穹之扉(The Gate of Firmament)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,,,,,,,,,,,...,,3.0,,,,,,,,
--ace--,,,,,,,,,,,...,,,,,,,,,,
--ionex--,,,,,,,,,,,...,,,,,,,,,,
-2SV-vuLB-Kg,,,,,,,,,,,...,,,,,,,,,,
-Azsael-,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwanzigdrei,,,,,,,,,,,...,,,,,,,,,,
zy0705,,,,,,,,,,,...,,,,,,,,,,
zynxgameth,,,,,,,,,,,...,,,,,,,,,,
zyr0n1c,,,,,,,,,,,...,,,,,,,,,,


In [9]:
# Para ahorrar memoria, trabajemos con una muestra de los datos
random_rows = 5000
u_matrix_sample = u_matrix.sample(n=random_rows, random_state=42)


In [10]:
# Vamos a normalizar los valores del marco de datos. La normalización ajusta las calificaciones de un usuario, se centran en cero y se escalan según su variabilidad.
scaler = MinMaxScaler()
umatrix_norm = pd.DataFrame(scaler.fit_transform(u_matrix_sample), columns=u_matrix_sample.columns, index=u_matrix_sample.index)

# Las columnas que contienen solo cero o no tienen calificación se eliminan, los espacios en blanco se llenan con 0.
umatrix_norm.fillna(0, inplace=True)
umatrix_norm = umatrix_norm.T
umatrix_norm = umatrix_norm.loc[:, (umatrix_norm != 0).any(axis=0)]
umatrix_norm

user_id,bhim_a_saurus,76561198062325995,fatkings,76561198087282941,76561198059870308,flix141,76561198044865157,marhan,76561198001115538,IronMoose,...,kinit,76561198141851819,Orphaned,FakeTaxiServicesAU,76561198072465460,MessWithNoOne,Failkills007,76561198114969651,tomtoth,76561197979007418
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'n Verlore Verstand,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
the static speaks my name,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theBlu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Estamos convirtiendo umatrix_norm a un formato de matriz dispersa para reducir la memoria utilizada y mejorar la eficiencia en el manejo de grandes conjuntos de datos, especialmente cuando la mayoría de los valores de la matriz son ceros. Luego también normalizamos la matriz dispersa.


In [11]:
um_sparse = sp.sparse.csr_matrix(umatrix_norm.values)
um_sparse

<6941x4006 sparse matrix of type '<class 'numpy.float64'>'
	with 194371 stored elements in Compressed Sparse Row format>

In [12]:
um_sparse_normalized = normalize(um_sparse)

# Ahora, aplicamos el modelo de similitud del coseno a la matriz dispersa normalizada transpuesta
user_similarity = cosine_similarity(um_sparse_normalized.T)

# Ahora, vamos a guardar la matriz del modelo en un marco de datos para poder usarla en nuestra función.
user_sim_df = pd.DataFrame(user_similarity, index = umatrix_norm.columns, columns = umatrix_norm.columns)

user_sim_df


user_id,bhim_a_saurus,76561198062325995,fatkings,76561198087282941,76561198059870308,flix141,76561198044865157,marhan,76561198001115538,IronMoose,...,kinit,76561198141851819,Orphaned,FakeTaxiServicesAU,76561198072465460,MessWithNoOne,Failkills007,76561198114969651,tomtoth,76561197979007418
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bhim_a_saurus,1.000000,0.004569,0.027144,0.014563,0.009962,0.017109,0.032447,0.042184,0.081065,0.079740,...,0.121768,0.020920,0.037355,0.045373,0.009045,0.017793,0.127120,0.046260,0.071285,0.022207
76561198062325995,0.004569,1.000000,0.011623,0.017213,0.128583,0.051515,0.005247,0.000000,0.010833,0.003111,...,0.001303,0.012860,0.001524,0.051340,0.029986,0.015442,0.005058,0.008095,0.005893,0.006627
fatkings,0.027144,0.011623,1.000000,0.089673,0.048787,0.030273,0.008513,0.049842,0.130623,0.036960,...,0.031279,0.023614,0.024358,0.008083,0.016496,0.042597,0.031343,0.026740,0.004640,0.002295
76561198087282941,0.014563,0.017213,0.089673,1.000000,0.021808,0.027765,0.054552,0.014846,0.048999,0.019943,...,0.001120,0.012566,0.007245,0.011813,0.012101,0.086399,0.002394,0.012513,0.004033,0.002493
76561198059870308,0.009962,0.128583,0.048787,0.021808,1.000000,0.032977,0.016720,0.010927,0.044304,0.010528,...,0.002228,0.054390,0.004750,0.051925,0.069932,0.021733,0.046563,0.060251,0.027092,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MessWithNoOne,0.017793,0.015442,0.042597,0.086399,0.021733,0.050672,0.004986,0.005557,0.068235,0.016234,...,0.002467,0.035534,0.035943,0.027982,0.029496,1.000000,0.002327,0.005841,0.005487,0.012548
Failkills007,0.127120,0.005058,0.031343,0.002394,0.046563,0.040232,0.018877,0.066428,0.049477,0.089743,...,0.104754,0.007209,0.029139,0.025613,0.004518,0.002327,1.000000,0.027156,0.016817,0.014786
76561198114969651,0.046260,0.008095,0.026740,0.012513,0.060251,0.032282,0.014267,0.025634,0.036455,0.049860,...,0.035099,0.040122,0.012548,0.034865,0.005691,0.005841,0.027156,1.000000,0.019395,0.005541
tomtoth,0.071285,0.005893,0.004640,0.004033,0.027092,0.002698,0.001612,0.008094,0.010509,0.027955,...,0.012948,0.001351,0.039522,0.005497,0.000572,0.005487,0.016817,0.019395,1.000000,0.000000


Crear la función de recomendación de elementos de usuario.

In [13]:
def similar_user_recs(user: str):
    # Comprobar si el usuario está presente en las columnas umatrix_norm (si no, devolver un mensaje)
    if user not in umatrix_norm.columns:
        return 'No hay datos disponibles para el usuario {}'.format(user)
    
    # Obtener los usuarios más similares al usuario dado
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    
    best = []  # Lista para almacenar los artículos mejor valorados por usuarios similares
    most_common = {}  # Diccionario para contar cuantas veces se recomienda cada artículo
    
    # Para cada usuario similar, busque el artículo mejor calificado y agréguelo a la lista de "mejores"
    for i in sim_users:
        max_score = umatrix_norm.loc[:, i].max()
        best.extend(umatrix_norm[umatrix_norm.loc[:, i] == max_score].index.tolist())
           
    # Cuenta cuántas veces se recomienda cada artículo
    for j in best:
        most_common[j] = most_common.get(j, 0) + 1
    
    # Ordenar elementos por frecuencia de recomendación en orden descendente
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    
    # Devolver los 5 artículos más recomendados
    return 'Usuarios similares a {}: también les gustó'.format(user), sorted_list[:5]

In [14]:
# Comprobando la función
similar_user_recs('tomtoth')

('Usuarios similares a tomtoth: también les gustó',
 [('Unturned', 9),
  ('Counter-Strike: Global Offensive', 8),
  ('PAYDAY 2', 8),
  ("Garry's Mod", 7),
  ('Left 4 Dead 2', 7)])

### Item Recomendation
Este modelo de modelo tendrá una relación ítem-ítem, es decir, se toma un ítem, en base a qué tan similar sea ese ítem al resto, se recomiendan similares. Aquí la entrada es un juego y la salida es una lista de juegos recomendados.

In [15]:
df_mod_game = df_model[['item_id', 'item_name', 'genres']]
df_mod_game.drop_duplicates(inplace=True)
df_mod_game


Unnamed: 0,item_id,item_name,genres
0,282010,Carmageddon Max Pack,Action
55,282010,Carmageddon Max Pack,Indie
110,282010,Carmageddon Max Pack,Racing
165,70,Half-Life,Action
2866,1640,Disciples II: Gallean's Return,Strategy
...,...,...,...
3379988,30,Day of Defeat,Action
3380548,200980,Geneforge 2,Strategy
3380555,200980,Geneforge 2,RPG
3380562,200980,Geneforge 2,Indie


In [16]:
# Creamos una nueva columna 'genre_tag' con valores 1.
df_mod_game['genre_tag'] = 1 
df_mod_game

Unnamed: 0,item_id,item_name,genres,genre_tag
0,282010,Carmageddon Max Pack,Action,1
55,282010,Carmageddon Max Pack,Indie,1
110,282010,Carmageddon Max Pack,Racing,1
165,70,Half-Life,Action,1
2866,1640,Disciples II: Gallean's Return,Strategy,1
...,...,...,...,...
3379988,30,Day of Defeat,Action,1
3380548,200980,Geneforge 2,Strategy,1
3380555,200980,Geneforge 2,RPG,1
3380562,200980,Geneforge 2,Indie,1


In [17]:
# Creamos un marco de datos auxiliar 'df_id' para extraer el id_item para nuestra función de recomendación.
df_id = df_mod_game[['item_id', 'item_name']].drop_duplicates()

df_id

Unnamed: 0,item_id,item_name
0,282010,Carmageddon Max Pack
165,70,Half-Life
2866,1640,Disciples II: Gallean's Return
2902,1630,Disciples II: Rise of the Elves
2918,3800,Advent Rising
...,...,...
3379810,40990,Mafia
3379946,259340,Chicken Shoot Gold
3379988,30,Day of Defeat
3380548,200980,Geneforge 2


In [18]:
# Vamos a crear una matriz que contenga los 'item_names' como índices, los 'genres' como columnas y la 'genre_tag' como valores.

In [19]:
g_matrix = df_mod_game.pivot_table(index='item_name', columns='genres', values='genre_tag', fill_value=0)
g_matrix

genres,Action,Action RPG,Action-Adventure,Adventure,Aliens,Animation & Modeling,Anime,Arcade,Arena Shooter,Atmospheric,...,Violent,Visual Novel,Voxel,Walking Simulator,War,Wargame,Web Publishing,Werewolves,Zombies,e-sports
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'n Verlore Verstand,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
the static speaks my name,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theBlu,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Vamos a convertir g_matrix_sample a un formato de matriz dispersa para reducir la memoria utilizada y mejorar la eficiencia en el manejo de grandes conjuntos de datos, especialmente cuando la mayoría de los valores de la matriz son ceros.

In [20]:
game_sparse = sp.sparse.csr_matrix(g_matrix.values)

# Aplicar el modelo de similitud del coseno a la matriz sparse.
game_similarity = cosine_similarity(game_sparse)

# Guardando el modelo matrix en un nuev dataframe
game_sim_df = pd.DataFrame(game_similarity, index = g_matrix.index, columns = g_matrix.index)
game_sim_df


item_name,! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,$1 Ride,'n Verlore Verstand,001 Game Creator,0RBITALIS,1 Moment Of Time: Silentville,"1,000 Heads Among the Trees",10 Second Ninja,...,rymdkapsel,sZone-Online,samurai_jazz,simian.interface++,stratO,the static speaks my name,theBlu,theHunter: Primal,侠客风云传(Tale of Wuxia),軒轅劍外傳穹之扉(The Gate of Firmament)
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,1.000000,0.500000,0.288675,0.866025,0.353553,0.0,0.353553,0.707107,0.500000,0.707107,...,0.353553,0.250000,0.866025,0.707107,0.577350,0.500000,0.612372,0.577350,0.447214,0.000000
"""Glow Ball"" - The billiard puzzle game",0.500000,1.000000,0.577350,0.577350,0.235702,0.0,0.353553,0.353553,0.500000,0.353553,...,0.707107,0.000000,0.288675,0.707107,0.288675,0.250000,0.408248,0.000000,0.447214,0.000000
#SelfieTennis,0.288675,0.577350,1.000000,0.333333,0.272166,0.0,0.408248,0.408248,0.577350,0.408248,...,0.408248,0.000000,0.333333,0.408248,0.333333,0.288675,0.471405,0.000000,0.000000,0.000000
$1 Ride,0.866025,0.577350,0.333333,1.000000,0.272166,0.0,0.408248,0.408248,0.577350,0.816497,...,0.408248,0.288675,0.666667,0.816497,0.666667,0.288675,0.471405,0.333333,0.258199,0.000000
'n Verlore Verstand,0.353553,0.235702,0.272166,0.272166,1.000000,0.0,0.166667,0.333333,0.235702,0.166667,...,0.166667,0.000000,0.272166,0.333333,0.136083,0.235702,0.384900,0.136083,0.210819,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
the static speaks my name,0.500000,0.250000,0.288675,0.288675,0.235702,0.0,0.353553,0.707107,0.500000,0.353553,...,0.353553,0.500000,0.577350,0.353553,0.288675,1.000000,0.408248,0.288675,0.447214,0.500000
theBlu,0.612372,0.408248,0.471405,0.471405,0.384900,0.0,0.577350,0.577350,0.408248,0.288675,...,0.288675,0.000000,0.471405,0.577350,0.471405,0.408248,1.000000,0.471405,0.547723,0.000000
theHunter: Primal,0.577350,0.000000,0.000000,0.333333,0.136083,0.0,0.408248,0.408248,0.000000,0.408248,...,0.000000,0.288675,0.666667,0.000000,0.666667,0.288675,0.471405,1.000000,0.516398,0.000000
侠客风云传(Tale of Wuxia),0.447214,0.447214,0.000000,0.258199,0.210819,0.0,0.316228,0.316228,0.000000,0.000000,...,0.316228,0.223607,0.258199,0.316228,0.258199,0.447214,0.547723,0.516398,1.000000,0.447214


In [21]:
# Creando la función de recomendación de Items
def get_recommendations_by_id(item_id: int):
    # Obtener el nombre del item por el ID
    game_name = df_id.loc[df_id['item_id'] == item_id, 'item_name'].iloc[0]

    # Verificando si existen items similares en la matriz
    if game_name not in game_sim_df.index:
        return [], f"ID {item_id} has not data avalible."

    # Obteniendo la fila correspondiente al item
    game_row = game_sim_df.loc[game_name]

    # Encuentra artículos similares ordenando la fila
    similar_games = game_sim_df.dot(game_row).sort_values(ascending=False)

    # Eliminar el elemento en sí de la lista de recomendaciones
    similar_games = similar_games.drop(game_name)

    # Tome los primeros 5 juegos como recomendaciones y regrese en formato de lista
    recommendations = similar_games.nlargest(5).index.tolist()

    return 'Recomendación de artículos similares al artìculo {}'.format(item_id), recommendations

In [22]:
row = df_id[df_id['item_name'] == 'theHunter: Primal']
row

Unnamed: 0,item_id,item_name
1894349,322920,theHunter: Primal


In [23]:
get_recommendations_by_id(322920)

('Recomendación de artículos similares al artìculo 322920',
 ['Dungeon of Zolthan',
  'Epsilon corp.',
  'Miasmata',
  'The Ball',
  'Enter the Gungeon'])

In [24]:
dfs = [umatrix_norm, user_sim_df, df_id, game_sim_df,]

# Nombres correspondientes a cada DataFrame
names = ['umatrix_norm','user_sim', 'df_id', 'game_sim']

# Guardando los dataframes como archivos parquet para uso de la API
ut.save_to_pq(dfs, names)

DataFrame 'umatrix_norm' save as '../Data/parquet/umatrix_norm.parquet'
DataFrame 'user_sim' save as '../Data/parquet/user_sim.parquet'
DataFrame 'df_id' save as '../Data/parquet/df_id.parquet'
DataFrame 'game_sim' save as '../Data/parquet/game_sim.parquet'


In [25]:
user_sim_df.to_parquet('../Data/parquet/user_sim.parquet.gz', compression='gzip', index=False)

game_sim_df.to_parquet('../Data/parquet/game_sim.parquet.gz', compression='gzip', index=False)
