In [58]:
import pandas as pd
import numpy as np

In [59]:
# df_steam_user = pd.read_csv(
#     '../../data/raw/steam-200k.csv',
#     sep=',',
#     header=None,
#     names=['USER_ID', 'GAME_TITLE', 'BEHAVIOR_NAME', 'VALUE', '0']
# )

df_steam_games_raw = pd.read_csv(
    '../../data/raw/games.csv',
    usecols=['Name', 'Required age', 'Price', 'Windows', 'Achievements', 'Categories', 'Genres']
)

In [60]:
pd.options.display.max_columns = 200

In [61]:
def cria_identificadores_genero(df:pd.DataFrame, col:str, sep:str = ',') -> pd.DataFrame:
    """
    Função que cria identificadores para jogos a partir de colunas contendo strings separadas por um delimitador comum
    """
    df = df.copy()

    generos = np.unique(
        df[col]
        .drop_duplicates()
        .str.cat(sep=sep)
        .split(sep)
    )

    for genero in generos:
        df[f'{genero}'.upper().replace(' ','_')] = (
            df[col].str.contains(genero, regex=True)
        )

    return df

In [122]:
def preprocessa_steam_games(df_steam_games: pd.DataFrame) -> pd.DataFrame:
    """
    Função que realiza o pré-processamento do dataset de jogos da Steam
    """

    # Tratamentos iniciais
    df_steam_games = df_steam_games[
        (df_steam_games['Windows'] == True) & (df_steam_games['Price'] <= 200)
    ].copy()

    df_steam_games = df_steam_games.dropna(subset=['Genres', 'Categories'])

    # Faixa de preço
    df_steam_games['FAIXA_PRECO'] = pd.cut(df_steam_games['Price'].round(0), bins=20)

    # Identifica se o jogo é para maiores de idade
    df_steam_games['PARA_MAIORES'] = df_steam_games['Required age'] >= 18

    # Separa generos em colunas
    df_steam_games = cria_identificadores_genero(df_steam_games, 'Genres')
    df_steam_games = cria_identificadores_genero(df_steam_games, 'Categories')

    # Transforma booleano em numérico
    bool_cols = df_steam_games.select_dtypes('bool').columns
    df_steam_games[bool_cols] = df_steam_games[bool_cols].astype('int64')

    df_steam_games = df_steam_games.drop(['Genres', 'Categories', 'Required age', 'Windows', 'Price'], axis=1)
    return df_steam_games

In [123]:
df = preprocessa_steam_games(df_steam_games_raw)

  df[col].str.contains(genero, regex=True)


In [124]:
df

Unnamed: 0,Name,Achievements,FAIXA_PRECO,PARA_MAIORES,360_VIDEO,ACCOUNTING,ACTION,ADVENTURE,ANIMATION_&_MODELING,AUDIO_PRODUCTION,CASUAL,DESIGN_&_ILLUSTRATION,DOCUMENTARY,EARLY_ACCESS,EDUCATION,EPISODIC,FREE_TO_PLAY,GAME_DEVELOPMENT,GORE,INDIE,MASSIVELY_MULTIPLAYER,MOVIE,NUDITY,PHOTO_EDITING,RPG,RACING,SEXUAL_CONTENT,SHORT,SIMULATION,SOFTWARE_TRAINING,SPORTS,STRATEGY,TUTORIAL,UTILITIES,VIDEO_PRODUCTION,VIOLENT,WEB_PUBLISHING,CAPTIONS_AVAILABLE,CO-OP,COMMENTARY_AVAILABLE,CROSS-PLATFORM_MULTIPLAYER,FULL_CONTROLLER_SUPPORT,HDR_AVAILABLE,IN-APP_PURCHASES,INCLUDES_SOURCE_SDK,INCLUDES_LEVEL_EDITOR,LAN_CO-OP,LAN_PVP,MMO,MODS,MODS_(REQUIRE_HL2),MULTI-PLAYER,ONLINE_CO-OP,ONLINE_PVP,PARTIAL_CONTROLLER_SUPPORT,PVP,REMOTE_PLAY_TOGETHER,REMOTE_PLAY_ON_PHONE,REMOTE_PLAY_ON_TV,REMOTE_PLAY_ON_TABLET,SHARED/SPLIT_SCREEN,SHARED/SPLIT_SCREEN_CO-OP,SHARED/SPLIT_SCREEN_PVP,SINGLE-PLAYER,STATS,STEAM_ACHIEVEMENTS,STEAM_CLOUD,STEAM_LEADERBOARDS,STEAM_TRADING_CARDS,STEAM_TURN_NOTIFICATIONS,STEAM_WORKSHOP,STEAMVR_COLLECTIBLES,TRACKED_CONTROLLER_SUPPORT,TRACKED_MOTION_CONTROLLER_SUPPORT,VR_ONLY,VR_SUPPORT,VR_SUPPORTED,VALVE_ANTI-CHEAT_ENABLED
0,Galactic Bowling,30,"(10.0, 20.0]",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,Train Bandit,12,"(-0.2, 10.0]",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
2,Jolt Project,0,"(-0.2, 10.0]",0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Henosis™,0,"(-0.2, 10.0]",0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Two Weeks in Painland,17,"(-0.2, 10.0]",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85098,Mannerheim's Saloon Car,0,"(-0.2, 10.0]",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0
85099,Beer Run,0,"(-0.2, 10.0]",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
85100,My Friend The Spider,0,"(-0.2, 10.0]",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
85101,Path of Survivors,34,"(-0.2, 10.0]",0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0
