def userforgenre( género : str ): Top 5 de usuarios con más horas de juego en el género dado, con su URL (del user) y user_id.

In [1]:
import pandas as pd
import numpy as np
import ast

In [2]:
# Crear una tabla para las las horas jugadas por genero de cada usuario
# Leer dataframe solo con columnas a importar
columnas = ['id','genres'] 
df = pd.read_csv('steam_games.csv',usecols=columnas)
df.rename(columns={'id':'item_id'},inplace=True)
# remover los items que no tienen un genero definido
df = df.dropna(subset=['genres']) 

In [3]:
# Funcion que cambia de string a formato list los objetos
def tolist(lst):
    if isinstance(lst, str) :
        lst = lst.replace("0's",'0s')
        lst =  ast.literal_eval(lst)

    return list(lst)
# Mapear la columna con la funcion creada
df['genres'] = df['genres'].apply(lambda x : tolist(x))

In [4]:
# Función para crear las columnas binarias de los géneros
def create_genre_columns(dataframe):
    genres_list = set()

    # se crea el set con los valores DISTINCT de los géneros
    for genres in dataframe['genres']:
        if isinstance(genres, list):
            genres_list.update(genres)
    
    # crea una columna por cada genre en genre_list 
    # se recorre la columna genre imputando 1 cuando existe ese género 
    # en la fila analizada
    for genre in genres_list:
        dataframe[genre] = dataframe['genres'].apply(lambda x: 1 if genre in x else 0)
    
    return dataframe

In [5]:
# Aplicar la función
df_binario = create_genre_columns(df)
# Eliminar la columna genres
df_binario.drop('genres',axis=1,inplace=True)

In [6]:
#Importar el csv de los items por usuario
columnas = ['steam_id','user_url','item_id','playtime_forever'] # Columnas a importar
df_users = pd.read_csv('game_items.csv',usecols=columnas)
# Merge con el df_binario recientemente modificado
df_users = df_users.merge(df_binario,on='item_id',how='left')
# Imputar nulos con cero 0
df_users.fillna(0,inplace=True)
df_users.rename(columns={"steam_id":"user_id"}, inplace=True)

In [7]:
df_users

Unnamed: 0,user_id,user_url,item_id,playtime_forever,Web Publishing,Utilities,Sports,Strategy,Racing,RPG,...,Adventure,Software Training,Simulation,Casual,Massively Multiplayer,Education,Indie,Design &amp; Illustration,Photo Editing,Animation &amp; Modeling
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,10,6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,20,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,76561197970982479,http://steamcommunity.com/profiles/76561197970...,30,7,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,76561197970982479,http://steamcommunity.com/profiles/76561197970...,40,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,76561197970982479,http://steamcommunity.com/profiles/76561197970...,50,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5153204,76561198329548331,http://steamcommunity.com/profiles/76561198329...,346330,0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
5153205,76561198329548331,http://steamcommunity.com/profiles/76561198329...,373330,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5153206,76561198329548331,http://steamcommunity.com/profiles/76561198329...,388490,3,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5153207,76561198329548331,http://steamcommunity.com/profiles/76561198329...,521570,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0


In [8]:
# Agrupar por usuario y calcular la suma total de tiempo jugado
col_sum = [ 'Indie','Racing','Sports','Audio Production','Animation &amp; Modeling',
            'Video Production','Software Training','Casual','Simulation','Utilities',
            'Photo Editing','Early Access','Accounting','Education',
            'Massively Multiplayer','Action','Web Publishing',
            'Adventure','Strategy','RPG','Design &amp; Illustration','Free to Play']
for col_name in col_sum:
    df_users[col_name] = df_users['playtime_forever'] * df_users[col_name]
df_users_group = df_users.groupby(['user_id','user_url'])[col_sum].agg('sum').reset_index()
df_users_group.columns = df_users_group.columns.str.lower()
df_users_group.head(3)

Unnamed: 0,user_id,user_url,indie,racing,sports,audio production,animation &amp; modeling,video production,software training,casual,...,accounting,education,massively multiplayer,action,web publishing,adventure,strategy,rpg,design &amp; illustration,free to play
0,76561197960269200,http://steamcommunity.com/id/ONIONZZZ,69503.0,18711.0,16038.0,0.0,52.0,0.0,52.0,2758.0,...,0.0,52.0,7534.0,169622.0,0.0,16764.0,7068.0,6525.0,52.0,5432.0
1,76561197960270504,http://steamcommunity.com/id/exaqt,68245.0,22190.0,22086.0,0.0,0.0,0.0,0.0,9713.0,...,0.0,0.0,836.0,117682.0,0.0,35909.0,71778.0,52364.0,0.0,2548.0
2,76561197960270742,http://steamcommunity.com/id/dvs,169.0,0.0,1192.0,0.0,0.0,0.0,0.0,18.0,...,0.0,0.0,0.0,93128.0,0.0,0.0,13.0,0.0,0.0,30.0


In [9]:
# Guardar el df en un csv que consumirá la API
df_users_group.to_csv('df-funciones-4.csv',index=False)

In [10]:
# Función que devuelve el top 5 de usuarios que más jugaron un género dado
def genre_top5_users(text : str):
    text = text.lower().strip()
    df1 = pd.read_csv('df-funciones-4.csv')
    if text not in df1.columns:
        return "Genre not found"
    
    top5 = df1.sort_values(by=text,ascending=False).head(5).reset_index()
    response = []
    for i in range (0 , 5):
        user_id = top5['user_id'].loc[i]
        user_url = top5['user_url'].loc[i]
        item_dict = {"user_id":user_id,
                     "user_url":user_url}
        response.append(item_dict)
    return response

In [12]:
genre_top5_users("Sports")

[{'user_id': 76561197995725436,
  'user_url': 'http://steamcommunity.com/profiles/76561197995725436'},
 {'user_id': 76561198019112245,
  'user_url': 'http://steamcommunity.com/profiles/76561198019112245'},
 {'user_id': 76561198065902279,
  'user_url': 'http://steamcommunity.com/profiles/76561198065902279'},
 {'user_id': 76561198075742344,
  'user_url': 'http://steamcommunity.com/id/29123'},
 {'user_id': 76561198065500375,
  'user_url': 'http://steamcommunity.com/id/express157'}]