In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
dt_suffix = '21_01_2023_20_08_24'

In [3]:
with open(f'./data/raw/auctions_{dt_suffix}.json','r') as f:
    auctions = json.load(f)

In [4]:
execution_ts = auctions['execution_ts']

In [5]:
%%time
#https://towardsdatascience.com/all-pandas-json-normalize-you-should-know-for-flattening-json-13eae1dfb7dd
df_auctions = pd.json_normalize(auctions['auctions'],max_level=3)

Wall time: 816 ms


In [6]:
%%time
df_itens = pd.json_normalize(auctions['auctions'],record_path=['auction_itens'],
        meta=['auction_number'],max_level=3)

Wall time: 804 ms


In [7]:
%%time
df_bids = pd.json_normalize(auctions['auctions'],record_path=['auction_itens','auction_item_bids'],
        meta=['auction_number',['auction_itens','auction_item_lote']],max_level=3)

Wall time: 601 ms


In [8]:
############
##AUCTIONS##
############
df_auctions.drop(['auction_itens'],axis=1,inplace=True)
df_auctions['execution_ts'] = execution_ts

In [9]:
#############
##OPTIONALS##
#############
df_itens_opt_name = df_itens['auction_item_optionals'].apply(pd.Series).reset_index().melt(id_vars='index').dropna()[['index', 'value']].set_index('index')
df_itens_opt_number = df_itens['auction_number'].apply(pd.Series).reset_index().melt(id_vars='index').dropna()[['index', 'value']].set_index('index')
df_itens_opt_lote = df_itens['auction_item_lote'].apply(pd.Series).reset_index().melt(id_vars='index').dropna()[['index', 'value']].set_index('index')
df_optionals = pd.merge(
    pd.merge(
        df_itens_opt_number,df_itens_opt_lote,left_index=True,right_index=True
    ),
    df_itens_opt_name,left_index=True,right_index=True
)
df_optionals.columns = ['auction_number','auction_item_lote','auction_item_optionals']
df_optionals['execution_ts'] = execution_ts

In [10]:
#########
##ITENS##
#########
df_itens.drop(['auction_item_bids','auction_item_optionals'],axis=1,inplace=True)
cols = df_itens.columns
reordered_cols = cols[-1:].append(cols[:-1])
df_itens = df_itens[reordered_cols]
df_itens['execution_ts'] = execution_ts

In [11]:
########
##BIDS##
########
df_bids.head()
cols = df_bids.columns
reordered_cols = cols[-2:].append(cols[:-2])
df_bids = df_bids[reordered_cols]
df_bids.rename({'auction_itens.auction_item_lote':'auction_item_lote'},axis=1,inplace=True)

In [12]:
df_auctions.head(2)

Unnamed: 0,auction_number,auction_name,auction_category,auction_date,auction_status,auction_url,auction_edital_url,execution_ts
0,1081,BANCOS E FINANCEIRAS,VEÍCULOS,"25 jan 2023, 13:00",Lances online agora,https://www.parquedosleiloes.com.br/leilao/108...,https://www.parquedosleiloes.com.br/leilao/108...,21/01/2023 20:08:24
1,1074,BANCOS E FINANCEIRAS,VEÍCULOS,"12 jan 2023, 13:00",Encerrado,https://www.parquedosleiloes.com.br/leilao/107...,https://www.parquedosleiloes.com.br/leilao/107...,21/01/2023 20:08:24


In [13]:
df_itens.head(2)

Unnamed: 0,auction_number,auction_item_lote,auction_item_type,auction_item_name,auction_item_url,auction_item_min_bid_value,auction_item_increment_value,auction_item_highest_bid,auction_item_date_text,auction_item_description,auction_item_model,auction_item_brand,auction_item_model_year,auction_item_market_price,auction_item_color,auction_item_gas_type,auction_item_km,auction_item_refurbished,execution_ts
0,1081,1,Online,FIAT ARGO DRIVE 1.3 2019/2020 VERMELHA,https://www.parquedosleiloes.com.br/leilao/108...,39500,500,,"Encerra em 25 jan 2023, 13:00:00",(SANTANDER) DOCUMENTO EM FASE DE EMISSÃO. PARA...,ARGO DRIVE 1.3,FIAT,2019/2020,"R$ 66.377,00 / janeiro de 2023",VERMELHA,FLEX,49208,Não,21/01/2023 20:08:24
1,1081,2,Online,RENAULT SANDERO EXP1016V 2013/2014 VERMELHA,https://www.parquedosleiloes.com.br/leilao/108...,14500,500,,"Encerra em 25 jan 2023, 13:01:00",(SANTANDER) DOCUMENTO CRLV EM NOSSO ESCRITÓRIO...,SANDERO EXP1016V,RENAULT,2013/2014,"R$ 32.779,00 / janeiro de 2023",VERMELHA,FLEX,205523,Não,21/01/2023 20:08:24


In [14]:
df_optionals.head(2)

Unnamed: 0_level_0,auction_number,auction_item_lote,auction_item_optionals,execution_ts
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1081,1,AIR BAG,21/01/2023 20:08:24
0,1081,1,ALARME,21/01/2023 20:08:24


In [15]:
df_bids.head(2)

Unnamed: 0,auction_number,auction_item_lote,auction_item_bid_value,auction_item_bid_type,auction_item_bid_datetime,auction_item_bid_user
0,1081,16,12500,Lance manual,21/01/23 12:07:04.031,geovane
1,1081,19,11500,Lance manual,21/01/23 14:49:22.100,avelino16


In [16]:
df_auctions.to_csv(f'data/tabular/{dt_suffix}_auctions.csv',index=False)
df_itens.to_csv(f'data/tabular/{dt_suffix}_itens.csv',index=False)
df_optionals.to_csv(f'data/tabular/{dt_suffix}_optionals.csv',index=False)
df_bids.to_csv(f'data/tabular/{dt_suffix}_bids.csv',index=False)
