def userdata( User_id : str ): 
Debe devolver cantidad de dinero gastado por el usuario, el porcentaje de recomendación en base a reviews.recommend y cantidad de items.

In [20]:
#importar las librerias
import pandas as pd
import numpy as np

In [21]:
# Leer dataframe de videjuegos solo con las columnas que se necesitan
columns = ['id', 'price']
df_precios = pd.read_csv("steam_games.csv", usecols=columns)
df_precios.head()

Unnamed: 0,price,id
0,4.99,761140.0
1,Free To Play,643980.0
2,Free to Play,670290.0
3,0.99,767400.0
4,2.99,773570.0


In [22]:
# Verificar que los datatypes estén bien y no haya nulos
df_precios.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32135 entries, 0 to 32134
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   price   30758 non-null  object 
 1   id      32133 non-null  float64
dtypes: float64(1), object(1)
memory usage: 502.2+ KB


In [23]:
# Función para corregir el precio, en el que aparecen valores numéricos y en formato string
def fix_price(df):

    errors_list = []
    for i in df['price']:
        try:
            float(i)
        except:
            errors_list.append(i)

    errors = set(errors_list)
    #uniques_not_free = ['Starting at $499.00', 'Starting at $449.00']
    df['price_fixed'] = df['price'].apply(lambda x: 0 if x in errors 
                                                        else 499.0 if x=='Starting at $499.00'
                                                        else 449.0 if x=='Starting at $449.00'
                                                        else x)
    df['price_fixed'] = df['price_fixed'].astype(float)
    return df

In [24]:
# Aplicar la función al dataframe
df_precios = fix_price(df_precios)
# Renombrar la columna de id 
df_precios.rename(columns={'id':'item_id'},inplace=True)
# Eliminar la antigua columna de precio
df_precios.drop(columns='price', inplace=True)
df_precios.head()

Unnamed: 0,item_id,price_fixed
0,761140.0,4.99
1,643980.0,0.0
2,670290.0,0.0
3,767400.0,0.99
4,773570.0,2.99


In [25]:
# Verificar nulos
df_precios.isna().sum()

item_id           2
price_fixed    1377
dtype: int64

In [26]:
# Reemplazar nulos en el precio con 0
df_precios.fillna(0, inplace=True)

In [27]:
# Leer el daraframe de user items solo con las columnas que se necesitan
columns = ['steam_id', 'item_id', 'items_count']
user_items = pd.read_csv("game_items.csv", usecols=columns)
user_items.head()
user_items.rename(columns={"steam_id":"user_id"},inplace=True)
user_items["user_id"]=user_items["user_id"].astype(str)

In [28]:
# Hacer un merge (left join) de ambos dataframes on item_id
df_merged = user_items.merge(df_precios, on='item_id', how='left')
df_merged.sample(10)

Unnamed: 0,user_id,items_count,item_id,price_fixed
3092206,76561198069644815,259,219640,24.99
1561007,76561197982474515,126,211820,14.99
3511699,76561198088032782,203,233190,9.99
3474052,76561198047227341,86,10500,19.99
478762,76561198097639220,101,234140,19.99
1158152,76561198066334301,101,227300,19.99
4377608,76561198046582634,405,268870,29.99
978354,76561197967514845,713,345080,11.99
2439745,76561197986964063,161,236830,
4766242,76561198071178481,93,433850,19.99


In [29]:
# Group by usuario para obtener el total de dinero gastado en videojuegos
user_spent = df_merged.groupby('user_id')['price_fixed'].agg('sum')
user_spent = pd.DataFrame(user_spent)
user_spent.reset_index(inplace=True)
user_spent["user_id"]=user_spent["user_id"].astype(str)

In [30]:
# Leer dataframe de reviews solo con las columnas que se necesitan
columns = ['user_id', 'recommend']
user_revs = pd.read_csv('game_reviews.csv', usecols=columns)
user_revs.sample(5)

Unnamed: 0,user_id,recommend
40891,dafishguy,True
44515,MixedMag,False
53744,76561198077457233,True
2755,peoplesidiot,True
29022,ForensiX,True


In [31]:
# Calcular el porcentaje de recomendación para cada usuario
# Crear un df vacío
user_gb_count_revs = pd.DataFrame()
# Agrupar por user_id y agregar por conteo de recommend para obtener el total de reviews
user_gb_count_revs['count_total'] = user_revs.groupby('user_id')['recommend'].agg('count')
# Contar solo los recommend = True por usuario
user_gb_count_revs['count_true'] = user_revs[user_revs['recommend'] == True].groupby('user_id')['recommend'].agg(count_revs='count')
user_gb_count_revs.reset_index(inplace=True)
# Calcular la proporción de True entre el total
user_gb_count_revs['reco_porcentaje'] = user_gb_count_revs['count_true'] / user_gb_count_revs['count_total'] * 100
user_gb_count_revs.fillna(0,inplace=True)
user_gb_count_revs.sample(5)

Unnamed: 0,user_id,count_total,count_true,reco_porcentaje
15141,LordRascus,9,7.0,77.777778
4218,76561198055325222,1,0.0,0.0
15251,MOwneZ,2,2.0,100.0
20186,fenrakk4,2,2.0,100.0
18078,ZareeFish,1,1.0,100.0


In [32]:
# Hacer un merge (left join) de los df de reviews y user_spent anteriormente creados
df_merge2 = user_gb_count_revs.merge(user_spent, on='user_id',how='left' )
df_merge2.rename(columns={'price_fixed':'total_spent'},inplace=True)
df_merge2.sample(10)

Unnamed: 0,user_id,count_total,count_true,reco_porcentaje,total_spent
4437,76561198057111829,2,0.0,0.0,230.81
1944,76561198030350231,1,1.0,100.0,678.64
2271,76561198036828583,1,0.0,0.0,550.71
16088,PixieFeet,3,3.0,100.0,
10634,76561198098151600,1,1.0,100.0,0.0
3661,76561198050051190,1,1.0,100.0,980.39
15683,NeoESWhiteoutZ,1,1.0,100.0,
10561,76561198097534775,4,4.0,100.0,
12791,CanDucksFly,1,1.0,100.0,
16603,Sauropod,2,2.0,100.0,


In [33]:
# # Se realiza otro merge con la tabla de user_items para obtener el total de items por usuario
df_merge3 = df_merge2.merge(user_items[['user_id','items_count']].drop_duplicates(),on='user_id',how='left')
# Se eliminan columnas innecesarias
df_merge3 = df_merge3.drop(['count_total','count_true'],axis=1)
# Renombrar columnas
df_merge3.rename(columns={
    'reco_porcentaje':'porcentaje_recomendacion', 'total_spent':'cantidad_gastada', 'items_count':'cantidad_items'},inplace=True)

In [34]:
# Guardar el dataframe final como csv para consumir en la API
df_merge3.to_csv('df-funcion-1-1.csv',index=False)

In [35]:
# Función de la consulta para obtener el dinero total gastado, cantidad de items 
# y porcentaje de recomendación por usuario
def userdata(user_id : str):
    df_user = pd.read_csv('df-funcion-1-1.csv')
    user_data = df_user[df_user['user_id'] == user_id]
    
    return user_data.to_json(orient='records')

In [38]:
userdata("MixedMag")	

'[{"user_id":"MixedMag","porcentaje_recomendacion":33.3333333333,"cantidad_gastada":null,"cantidad_items":null}]'