In [91]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import seaborn as sns
import numpy as np
from scipy.stats import percentileofscore
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import re
import tkinter as tk
from tkinter import messagebox


In [92]:
steam_description_df = pd.read_csv("steam_description_data.csv")
steam_media_df = pd.read_csv("steam_media_data.csv")
steam_requirements_df = pd.read_csv("steam_requirements_data.csv")
steam_support_df = pd.read_csv("steam_support_info.csv")
steam_user = pd.read_csv("steam_user_behavior.csv")
steam_df = pd.read_csv("steam.csv")
steamspy_tag_df = pd.read_csv("steamspy_tag_data.csv")

In [93]:
#Creamos las columnas "purchase" y "play"
purchase_data = steam_user[steam_user['Action'] == 'purchase'].copy()
play_data = steam_user[steam_user['Action'] == 'play'].copy()

#Agregamos la columna "_purchase" y "_play"
purchase_data['Value_purchase'] = 1
play_data['Value_play'] = play_data['Value']

#Combinamos los datos
merged_data = pd.merge(purchase_data, play_data, on=['UserID', 'Name'], how='left')
merged_data['Value_play'].fillna(0, inplace=True)

#Eliminamos columnas innecesarias
merged_data.drop(['Action_x', 'Action_y', 'Value_x', 'Value_y'], axis=1, inplace=True)
steam_user_clean = merged_data.drop('Value_purchase', axis=1)


print(steam_user_clean.head())

      UserID                        Name  Value_play
0  151603712  The Elder Scrolls V Skyrim       273.0
1  151603712                   Fallout 4        87.0
2  151603712                       Spore        14.9
3  151603712           Fallout New Vegas        12.1
4  151603712               Left 4 Dead 2         8.9


In [94]:
#Calculamos el percentil de las horas jugadas
percentiles = steam_user_clean['Value_play'].apply(lambda x: percentileofscore(steam_user_clean['Value_play'], x))

#Mapeamos los percentiles a un rango de 0 a 100
rating = percentiles.apply(lambda x: int(x * 100 / 100))

#Agregamos la columna "Rating" al DataFrame
steam_user_clean['Rating'] = rating

In [95]:
#Cambiamos el nombre de la columna "appid" a "steam_appid"
steam_df.rename(columns={"appid": "steam_appid"}, inplace=True)
steamspy_tag_df.rename(columns={"appid": "steam_appid"}, inplace=True)

In [96]:
#Combinamos los datasets de la info de los videojuegos
games_df = steam_description_df.merge(steam_media_df, on="steam_appid").merge(steam_requirements_df, on="steam_appid").merge(steam_support_df, on="steam_appid").merge(steam_df, on="steam_appid").merge(steamspy_tag_df, on="steam_appid")

In [97]:
#Creamos un nuevo DataFrame con columnas seleccionadas
games_df_clean = games_df[['steam_appid', 'short_description', 'name','release_date', 'developer', 'platforms','categories', 'genres', 'steamspy_tags','price']]

In [98]:
games_df_clean.rename(columns={"name": "Name"}, inplace=True)

In [99]:
#Eliminamos caracteres no alfabéticos de los nombres de los videojuegos
steam_user_clean['Name'] = steam_user_clean['Name'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))
games_df_clean['Name'] = games_df_clean['Name'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))

#Eliminamos dobles espacios
steam_user_clean['Name'] = steam_user_clean['Name'].apply(lambda x: ' '.join(x.split()))
games_df_clean['Name'] = games_df_clean['Name'].apply(lambda x: ' '.join(x.split()))

#Captializamos la primera letra de cada palabra
steam_user_clean['Name'] = steam_user_clean['Name'].apply(lambda x: x.title())
games_df_clean['Name'] = games_df_clean['Name'].apply(lambda x: x.title())


steam_user_clean.to_csv('steam_user_clean.csv', index=False)
games_df_clean.to_csv('games_df_clean.csv', index=False)

In [100]:
#Combinamos los DataFrames basándose en la columna "Name"
df = steam_user_clean.merge(games_df_clean, on='Name', how='left')
df.dropna(inplace=True)
#Reorganizamos columnas
df = df[[col for col in df.columns if col != 'Rating'] + ['Rating']]
df.head()

Unnamed: 0,UserID,Name,Value_play,steam_appid,short_description,release_date,developer,platforms,categories,genres,steamspy_tags,price,Rating
0,151603712,The Elder Scrolls V Skyrim,273.0,72850.0,EPIC FANTASY REBORN The next chapter in the hi...,2011-11-10,Bethesda Game Studios,windows,Single-player;Steam Achievements;Steam Trading...,RPG,Open World;RPG;Fantasy,9.99,98
1,151603712,Fallout 4,87.0,377160.0,"Bethesda Game Studios, the award-winning creat...",2015-11-09,Bethesda Game Studios,windows,Single-player;Steam Achievements;Full controll...,RPG,Open World;Post-apocalyptic;Exploration,19.99,95
2,151603712,Spore,14.9,17390.0,"From Single Cell to Galactic God, evolve your ...",2008-12-19,Maxis™,windows,Single-player,Simulation,Simulation;Sandbox;Open World,14.99,84
3,151603712,Fallout New Vegas,12.1,22380.0,Welcome to Vegas. New Vegas. Enjoy your stay!,2010-10-21,Obsidian Entertainment,windows,Single-player;Steam Achievements;Partial Contr...,Action;RPG,Open World;RPG;Post-apocalyptic,7.99,82
4,151603712,Left 4 Dead 2,8.9,550.0,"Set in the zombie apocalypse, Left 4 Dead 2 (L...",2009-11-19,Valve,windows;mac;linux,Single-player;Multi-player;Co-op;Steam Achieve...,Action,Zombies;Co-op;FPS,7.19,79


In [108]:
#Seleccionamos las columnas relevantes para el análisis
columns = ['UserID', 'Name', 'genres', 'categories', 'platforms', 'price', 'Rating']

#Filtramos el dataset con las columnas seleccionadas
df_filtered = df[columns]

#Limpiamos y preprocesamos los datos de las columnas###############################################################################################

#Combinamos las columnas de texto en una sola columna para representar los datos de los usuarios
df_filtered['combined_features'] = df_filtered['genres'] + ' ' + df_filtered['categories'] + ' ' + df_filtered[
    'platforms'] + ' ' + df_filtered['price'].astype(str)

#Creamos una matriz TF-IDF para representar los datos de los usuarios
tfidf = TfidfVectorizer()
user_matrix = tfidf.fit_transform(df_filtered['combined_features'])

#Aplicamos el algoritmo de clustering (K-means)
k = 20  # Número de clusters deseados
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(user_matrix)

#Agregamos las etiquetas de clusters al dataframe original
df_filtered['Cluster'] = kmeans.labels_

#Función para obtener las recomendaciones para un usuario específico
def obtener_recomendaciones(usuario):
    user_cluster = df_filtered.loc[df_filtered['UserID'] == usuario, 'Cluster'].values[0]

    #Filtramos los videojuegos pertenecientes al mismo cluster del usuario
    cluster_games = df_filtered.loc[df_filtered['Cluster'] == user_cluster]

    #Excluimos los juegos que el usuario ya ha jugado
    played_games = df_filtered.loc[df_filtered['UserID'] == usuario, 'Name'].unique()
    recommended_games = cluster_games.loc[~cluster_games['Name'].isin(played_games), 'Name'].unique()

    return recommended_games[:5]


#Función para manejar el evento del botón
def obtener_recomendaciones_usuario():
    nombre_usuario = entry_usuario.get()

    #Obtenemos el ID del usuario
    usuario = df_filtered.loc[df_filtered['UserID'] == int(nombre_usuario), 'UserID'].unique()
    if len(usuario) == 0:
        messagebox.showerror("Error", "Usuario no encontrado.")
        return

    usuario_id = usuario[0]

    #Obtenemos las recomendaciones
    recomendaciones = obtener_recomendaciones(usuario_id)

    #Mostramos las recomendaciones de videojuegos en una ventana emergente
    messagebox.showinfo("Recomendaciones", "\n".join(recomendaciones))


#Creamos la ventana
ventana = tk.Tk()
ventana.title("Recomendaciones de videojuegos")

#Creamos un campo de entrada de texto
entry_usuario = tk.Entry(ventana)
entry_usuario.pack()

#Creamos un botón para obtener las recomendaciones
boton_obtener_recomendaciones = tk.Button(ventana, text="Obtener Recomendaciones", command=obtener_recomendaciones_usuario)
boton_obtener_recomendaciones.pack()

#Ejecutamos el bucle principal de la ventana
ventana.mainloop()
