def userdata( User_id : str ): 
Debe devolver cantidad de dinero gastado por el usuario, el porcentaje de recomendación en base a reviews.recommend y cantidad de items.

In [1]:
#importar las librerias
import pandas as pd
import numpy as np

In [2]:
# Leer dataframe de videjuegos solo con las columnas que se necesitan
columns = ['id', 'price']
df_precios = pd.read_csv("steam_games.csv", usecols=columns)
df_precios.head()

Unnamed: 0,price,id
0,4.99,761140.0
1,Free To Play,643980.0
2,Free to Play,670290.0
3,0.99,767400.0
4,2.99,773570.0


In [3]:
# Verificar que los datatypes estén bien y no haya nulos
df_precios.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32135 entries, 0 to 32134
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   price   30758 non-null  object 
 1   id      32133 non-null  float64
dtypes: float64(1), object(1)
memory usage: 502.2+ KB


In [4]:
# Función para corregir el precio, en el que aparecen valores numéricos y en formato string
def fix_price(df):

    errors_list = []
    for i in df['price']:
        try:
            float(i)
        except:
            errors_list.append(i)

    errors = set(errors_list)
    #uniques_not_free = ['Starting at $499.00', 'Starting at $449.00']
    df['price_fixed'] = df['price'].apply(lambda x: 0 if x in errors 
                                                        else 499.0 if x=='Starting at $499.00'
                                                        else 449.0 if x=='Starting at $449.00'
                                                        else x)
    df['price_fixed'] = df['price_fixed'].astype(float)
    return df

In [5]:
# Aplicar la función al dataframe
df_precios = fix_price(df_precios)
# Renombrar la columna de id 
df_precios.rename(columns={'id':'item_id'},inplace=True)
# Eliminar la antigua columna de precio
df_precios.drop(columns='price', inplace=True)
df_precios.head()

Unnamed: 0,item_id,price_fixed
0,761140.0,4.99
1,643980.0,0.0
2,670290.0,0.0
3,767400.0,0.99
4,773570.0,2.99


In [6]:
# Verificar nulos
df_precios.isna().sum()

item_id           2
price_fixed    1377
dtype: int64

In [7]:
# Reemplazar nulos en el precio con 0
df_precios.fillna(0, inplace=True)

In [8]:
# Leer el daraframe de user items solo con las columnas que se necesitan
columns = ['steam_id', 'item_id', 'items_count']
user_items = pd.read_csv("game_items.csv", usecols=columns)
user_items.head()
user_items.rename(columns={"steam_id":"user_id"},inplace=True)
user_items["user_id"]=user_items["user_id"].astype(str)

In [9]:
# Hacer un merge (left join) de ambos dataframes on item_id
df_merged = user_items.merge(df_precios, on='item_id', how='left')
df_merged.sample(10)

Unnamed: 0,user_id,items_count,item_id,price_fixed
1449100,76561198072793232,77,236390,0.0
2214885,76561198046089070,198,301300,4.99
2560277,76561198037334070,245,201700,
1226123,76561198038561296,304,98200,24.99
2411209,76561198038230057,200,72850,
4890097,76561198079373364,18,301520,0.0
1348421,76561198057784104,93,7670,19.99
1228689,76561198002925281,269,57300,19.99
2071572,76561198088550185,120,244160,34.99
1184351,76561197992427444,482,225000,6.99


In [10]:
# Group by usuario para obtener el total de dinero gastado en videojuegos
user_spent = df_merged.groupby('user_id')['price_fixed'].agg('sum')
user_spent = pd.DataFrame(user_spent)
user_spent.reset_index(inplace=True)
user_spent["user_id"]=user_spent["user_id"].astype(str)

In [11]:
# Leer dataframe de reviews solo con las columnas que se necesitan
columns = ['user_id', 'recommend']
user_revs = pd.read_csv('game_reviews.csv', usecols=columns)
user_revs.sample(5)

Unnamed: 0,user_id,recommend
21335,KNGZigg,True
29582,ethanol10,True
20745,76561198061663176,True
25076,76561198097034946,True
31814,76561198291759171,True


In [12]:
# Calcular el porcentaje de recomendación para cada usuario
# Crear un df vacío
user_gb_count_revs = pd.DataFrame()
# Agrupar por user_id y agregar por conteo de recommend para obtener el total de reviews
user_gb_count_revs['count_total'] = user_revs.groupby('user_id')['recommend'].agg('count')
# Contar solo los recommend = True por usuario
user_gb_count_revs['count_true'] = user_revs[user_revs['recommend'] == True].groupby('user_id')['recommend'].agg(count_revs='count')
user_gb_count_revs.reset_index(inplace=True)
# Calcular la proporción de True entre el total
user_gb_count_revs['reco_porcentaje'] = user_gb_count_revs['count_true'] / user_gb_count_revs['count_total'] * 100
user_gb_count_revs.fillna(0,inplace=True)
user_gb_count_revs.sample(5)

Unnamed: 0,user_id,count_total,count_true,reco_porcentaje
12191,AnEvilReindeer,4,2.0,50.0
17119,SyberSybus,1,1.0,100.0
8087,76561198079117055,3,3.0,100.0
2143,76561198034776912,2,1.0,50.0
24033,snubbo,10,6.0,60.0


In [13]:
# Hacer un merge (left join) de los df de reviews y user_spent anteriormente creados
df_merge2 = user_gb_count_revs.merge(user_spent, on='user_id',how='left' )
df_merge2.rename(columns={'price_fixed':'total_spent'},inplace=True)
df_merge2.sample(10)

Unnamed: 0,user_id,count_total,count_true,reco_porcentaje,total_spent
8947,76561198084544554,5,5.0,100.0,1106.28
1274,76561198011526765,1,1.0,100.0,
20323,frikcha,8,7.0,87.5,
3126,76561198045613225,3,3.0,100.0,1593.74
10439,76561198096068574,1,1.0,100.0,1502.36
14463,ItsLeithal,5,5.0,100.0,
22583,mygrandsonnathan,2,2.0,100.0,
9789,76561198091034264,10,10.0,100.0,4138.24
22489,mr_pie_nice_cheese,2,2.0,100.0,
3634,76561198049813970,1,1.0,100.0,2325.64


In [14]:
# # Se realiza otro merge con la tabla de user_items para obtener el total de items por usuario
df_merge3 = df_merge2.merge(user_items[['user_id','items_count']].drop_duplicates(),on='user_id',how='left')
# Se eliminan columnas innecesarias
df_merge3 = df_merge3.drop(['count_total','count_true'],axis=1)
# Renombrar columnas
df_merge3.rename(columns={
    'reco_porcentaje':'porcentaje_recomendacion', 'total_spent':'cantidad_gastada', 'items_count':'cantidad_items'},inplace=True)

In [15]:
# Guardar el dataframe final como csv para consumir en la API
df_merge3.to_csv('df-funcion-1-1.csv',index=False)

In [16]:
# Función de la consulta para obtener el dinero total gastado, cantidad de items 
# y porcentaje de recomendación por usuario
def userdata(user_id : str):
    df_user = pd.read_csv('df-funcion-1-1.csv')
    user_data = df_user[df_user['user_id'] == user_id]
    
    return user_data.to_json(orient='records')

In [19]:
userdata("pikawuu2")	

'[{"user_id":"pikawuu2","porcentaje_recomendacion":100.0,"cantidad_gastada":null,"cantidad_items":null}]'