In [1]:
import pandas as pd 
import json


In [2]:

# Abre el archivo 'output_steam_games.json' en modo de lectura ('r') con codificación UTF-8.
with open('output_steam_games.json', 'r', encoding='utf-8') as archivo:
    # Carga los datos del archivo utilizando json.loads para cada línea.
    data = [json.loads(line) for line in archivo]

# Crea un DataFrame de pandas a partir de los datos cargados.
data_games = pd.DataFrame(data)

# Elimina las primeras 88,310 filas del DataFrame. 
# Esto es útil ya que las primeras filas no contienen información relevante o son encabezados no deseados.
data_games.drop(data_games.index[0:88310], inplace=True)

In [3]:
data_games

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88310,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140,Kotoshiro
88311,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980,Secret Level SRL
88312,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290,Poolians.com
88313,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400,彼岸领域
88314,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"[Action, Indie, Casual, Sports]",http://steamcommunity.com/app/773570/reviews/?...,"[Single-player, Full controller support, HTC V...",2.99,False,773570,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120440,Ghost_RUS Games,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,Colony On Mars,http://store.steampowered.com/app/773640/Colon...,2018-01-04,"[Strategy, Indie, Casual, Simulation]",http://steamcommunity.com/app/773640/reviews/?...,"[Single-player, Steam Achievements]",1.99,False,773640,"Nikita ""Ghost_RUS"""
120441,Sacada,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,http://store.steampowered.com/app/733530/LOGis...,2018-01-04,"[Strategy, Indie, Casual]",http://steamcommunity.com/app/733530/reviews/?...,"[Single-player, Steam Achievements, Steam Clou...",4.99,False,733530,Sacada
120442,Laush Studio,"[Indie, Racing, Simulation]",Russian Roads,Russian Roads,http://store.steampowered.com/app/610660/Russi...,2018-01-04,"[Indie, Simulation, Racing]",http://steamcommunity.com/app/610660/reviews/?...,"[Single-player, Steam Achievements, Steam Trad...",1.99,False,610660,Laush Dmitriy Sergeevich
120443,SIXNAILS,"[Casual, Indie]",EXIT 2 - Directions,EXIT 2 - Directions,http://store.steampowered.com/app/658870/EXIT_...,2017-09-02,"[Indie, Casual, Puzzle, Singleplayer, Atmosphe...",http://steamcommunity.com/app/658870/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",4.99,False,658870,"xropi,stev3ns"


In [4]:
# Elimina filas con valores nulos en columnas específicas y resetea el indice
data_games.drop(['app_name','early_access','url','reviews_url','tags','publisher','specs'], axis=1, inplace = True)
data_games.reindex()

Unnamed: 0,genres,title,release_date,price,id,developer
88310,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,2018-01-04,4.99,761140,Kotoshiro
88311,"[Free to Play, Indie, RPG, Strategy]",Ironbound,2018-01-04,Free To Play,643980,Secret Level SRL
88312,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,2017-07-24,Free to Play,670290,Poolians.com
88313,"[Action, Adventure, Casual]",弹炸人2222,2017-12-07,0.99,767400,彼岸领域
88314,,,,2.99,773570,
...,...,...,...,...,...,...
120440,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,2018-01-04,1.99,773640,"Nikita ""Ghost_RUS"""
120441,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,2018-01-04,4.99,733530,Sacada
120442,"[Indie, Racing, Simulation]",Russian Roads,2018-01-04,1.99,610660,Laush Dmitriy Sergeevich
120443,"[Casual, Indie]",EXIT 2 - Directions,2017-09-02,4.99,658870,"xropi,stev3ns"


In [5]:
# Convierte la columna 'release_date' a formato datetime y extrae el año.
data_games['release_date'] = pd.to_datetime(data_games['release_date'], errors='coerce').dt.year

# Elimina filas con valores nulos en columnas específicas: 'genres', 'title', 'release_date', 'price', 'id', 'developer'.
data_games = data_games.dropna(subset=['genres', 'title', 'release_date', 'price', 'id', 'developer'])

# Convierte la columna 'release_date' a tipo entero y luego a tipo cadena.
# Esto se hace para asegurarse de que 'release_date' sea un valor numérico antes de convertirlo a cadena.
data_games['release_date'] = data_games['release_date'].astype(int)
data_games['release_date'] = data_games['release_date'].astype(str)


In [6]:

# Reemplaza los valores específicos en la columna 'price' con 0.
data_games['price'] = data_games['price'].replace({'Free To Play': 0, 'Free to Play': 0, 'Play for Free!': 0})

# Filtra las filas donde la columna 'price' pueda convertirse a un valor numérico y elimina las demás.
data_games = data_games[pd.to_numeric(data_games['price'], errors='coerce').notnull()]

# Renombra la columna 'id' a 'item_id'.
data_games = data_games.rename(columns={'id': 'item_id'})

In [7]:
# Aplica el método 'pd.Series' a la columna 'genres' para expandir las listas en nuevas columnas.
data_games1 = data_games['genres'].apply(pd.Series)

# Elimina columnas específicas por índice (1, 2, 3, ..., 9).
data_games1.drop([1, 2, 3, 4, 5, 6, 7, 8, 9], axis=1, inplace=True)

In [8]:
# Concatena los DataFrames
data_games2 = pd.concat([data_games, data_games1], axis=1)

In [9]:
# Elimina la columna y renombramos la columna "0" a "genre" 
data_games2.drop(["genres"],axis=1,inplace=True)
data_games2.rename(columns={"0":"genre"},inplace=True)
data_games2


Unnamed: 0,title,release_date,price,item_id,developer,0
88310,Lost Summoner Kitty,2018,4.99,761140,Kotoshiro,Action
88311,Ironbound,2018,0,643980,Secret Level SRL,Free to Play
88312,Real Pool 3D - Poolians,2017,0,670290,Poolians.com,Casual
88313,弹炸人2222,2017,0.99,767400,彼岸领域,Action
88315,Battle Royale Trainer,2018,3.99,772540,Trickjump Games Ltd,Action
...,...,...,...,...,...,...
120439,Kebab it Up!,2018,1.99,745400,Bidoniera Games,Action
120440,Colony On Mars,2018,1.99,773640,"Nikita ""Ghost_RUS""",Casual
120441,LOGistICAL: South Africa,2018,4.99,733530,Sacada,Casual
120442,Russian Roads,2018,1.99,610660,Laush Dmitriy Sergeevich,Indie


In [10]:
data_games2.to_csv('games.csv', index=False)