In [1]:
from pathlib import Path
import pandas as pd

# Mostrar floats com duas casas decimas
pd.set_option('display.float_format',  lambda x: '%.3g' % x)
pd.options.display.max_colwidth = 20
pd.options.display.max_columns = 20
pd.options.display.max_rows = 4

In [2]:
# Ler a base ajustada no S3
# df_stocks = pd.read_feather('s3://aq-dl/HistoricalQuotations/base_adj.feather')
df_stocks = pd.read_feather('/mnt/aq_disk/data/HistoricalQuotations/processed/base_adj.feather')
df_stocks

Unnamed: 0,datneg,datven,prazot,codneg,codisi,especi,codbdi,tpmerc,dismes,preabe,...,preult,preofc,preofv,preexe,totneg,quatot,voltot,event1,event2,ajuste
0,1995-01-02,NaT,0,ACE 3,ACESACON,ON *INT,2,10,119,0.0521,...,0.0521,0.0521,0.0537,0,1,2.42e+08,1.26e+04,,,0.826
1,1995-01-02,NaT,0,ACE 4,ACESACPN,PN *INT,2,10,119,0.0603,...,0.0595,0.0591,0.0603,0,5,6.41e+09,3.85e+05,,,0.826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10907285,2022-06-06,NaT,0,Z2EN34,BRZ2ENBDR007,DRN,2,10,100,27.6,...,27.7,25.1,27.7,0,2,1.17e+03,3.25e+04,,,1
10907286,2022-06-06,NaT,0,Z2SC34,BRZ2SCBDR000,DRN,2,10,100,25.1,...,25.1,24,0,0,1,3,75.3,,,1


In [3]:
# Filtrar somente ações depois de 2011
# Retirar ações com menos de 5 negociações no dia -> essa ação, em termos práticos,
# não teve como ser negociada no dia
df_stocks.query(
    'codbdi == 2 and datneg >= "2011.01.01" and totneg >= 5',
    inplace=True
)
df_stocks.reset_index(drop=True, inplace=True)
df_stocks

Unnamed: 0,datneg,datven,prazot,codneg,codisi,especi,codbdi,tpmerc,dismes,preabe,...,preult,preofc,preofv,preexe,totneg,quatot,voltot,event1,event2,ajuste
0,2011-01-03,NaT,0,ABCB4,BRABCBACNPR4,PN EJ N2,2,10,113,6.21,...,6.25,6.25,6.3,0,226,5.07e+05,3.17e+06,,,0.42
1,2011-01-03,NaT,0,AEDU3,BRAEDUACNOR9,ON NM,2,10,104,13.6,...,13.7,13.7,13.9,0,1348,1.47e+06,2.02e+07,,,0.333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
859681,2022-06-06,NaT,0,YDUQ3,BRYDUQACNOR3,ON NM,2,10,103,15.8,...,15.2,15.2,15.2,0,5322,1.07e+06,1.65e+07,,,1
859682,2022-06-06,NaT,0,Z1OM34,BRZ1OMBDR000,DRN,2,10,100,21.4,...,21.2,20.8,21.9,0,5,79,1.66e+03,,,1


In [4]:
# Manter somente as colunas que serão usadas
cols = ['datneg', 'codneg', 'codisi', 'especi', 'premed', 'totneg', 'quatot', 'voltot']
df_stocks = df_stocks[cols].copy()
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot
0,2011-01-03,ABCB4,BRABCBACNPR4,PN EJ N2,6.24,226,5.07e+05,3.17e+06
1,2011-01-03,AEDU3,BRAEDUACNOR9,ON NM,13.8,1348,1.47e+06,2.02e+07
...,...,...,...,...,...,...,...,...
859681,2022-06-06,YDUQ3,BRYDUQACNOR3,ON NM,15.4,5322,1.07e+06,1.65e+07
859682,2022-06-06,Z1OM34,BRZ1OMBDR000,DRN,21,5,79,1.66e+03


In [5]:
df_stocks['day_year'] = df_stocks.datneg.dt.day_of_year
df_stocks['year'] = df_stocks.datneg.dt.year
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year
0,2011-01-03,ABCB4,BRABCBACNPR4,PN EJ N2,6.24,226,5.07e+05,3.17e+06,3,2011
1,2011-01-03,AEDU3,BRAEDUACNOR9,ON NM,13.8,1348,1.47e+06,2.02e+07,3,2011
...,...,...,...,...,...,...,...,...,...,...
859681,2022-06-06,YDUQ3,BRYDUQACNOR3,ON NM,15.4,5322,1.07e+06,1.65e+07,157,2022
859682,2022-06-06,Z1OM34,BRZ1OMBDR000,DRN,21,5,79,1.66e+03,157,2022


In [6]:
# Definir a data de corte e remover negociações posteriores ao corte
df_stocks.query('day_year >= 100', inplace=True)
df_stocks.reset_index(drop=True, inplace=True)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year
0,2011-04-11,ABCB4,BRABCBACNPR4,PN EJ N2,5.49,103,7.81e+04,4.29e+05,101,2011
1,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011
...,...,...,...,...,...,...,...,...,...,...
610104,2022-06-06,YDUQ3,BRYDUQACNOR3,ON NM,15.4,5322,1.07e+06,1.65e+07,157,2022
610105,2022-06-06,Z1OM34,BRZ1OMBDR000,DRN,21,5,79,1.66e+03,157,2022


In [7]:
# Criar dataframe com o dia mais próximo do corte para cada ano
df_corte = df_stocks.groupby(by=['year']).agg({'day_year':'min'}).reset_index()
df_corte

Unnamed: 0,year,day_year
0,2011,101
1,2012,100
...,...,...
10,2021,102
11,2022,101


In [8]:
# Usar o ano e o dia de corte como chaves para a operação de merge
df_stocks = df_stocks.merge(
    right=df_corte, how='inner', on=['year', 'day_year']
)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year
0,2011-04-11,ABCB4,BRABCBACNPR4,PN EJ N2,5.49,103,7.81e+04,4.29e+05,101,2011
1,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011
...,...,...,...,...,...,...,...,...,...,...
3684,2022-04-11,YDUQ3,BRYDUQACNOR3,ON NM,19.6,10252,2.64e+06,5.17e+07,101,2022
3685,2022-04-11,Z1OM34,BRZ1OMBDR000,DRN,20.6,237,1.25e+03,2.57e+04,101,2022


In [9]:
# Inserir o cód. dos emissores -> 4 caracteres depois do dois primeiros do ISIN
df_stocks["codemi"] = df_stocks.codisi.str[2:6]
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year,codemi
0,2011-04-11,ABCB4,BRABCBACNPR4,PN EJ N2,5.49,103,7.81e+04,4.29e+05,101,2011,ABCB
1,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011,AEDU
...,...,...,...,...,...,...,...,...,...,...,...
3684,2022-04-11,YDUQ3,BRYDUQACNOR3,ON NM,19.6,10252,2.64e+06,5.17e+07,101,2022,YDUQ
3685,2022-04-11,Z1OM34,BRZ1OMBDR000,DRN,20.6,237,1.25e+03,2.57e+04,101,2022,Z1OM


In [10]:
AQ_FOLDER = Path("/mnt/aq_disk/data/AQ/")
df_codes = pd.read_pickle(AQ_FOLDER / "codemi.pkl")
df_codes

Unnamed: 0,codcvm,cnpj,densoc,situac,codemi
0,60,18451005000104,ACOPALMA CIA IND...,CANCELADA,ZWVZ
1,94,92693019000189,PANATLANTICA SA,ATIVO,PATI
...,...,...,...,...,...
1762,26794,22902694000195,SELF IT ACADEMIA...,ATIVO,SLFT
1763,26808,04368898000106,COPEL DISTRIBUIÇ...,ATIVO,CPLD


In [11]:
# A chave do merge é o cód. do emissor (codemir) e somente o cód. CVM (codcvm)
# será inserido no merge
df_codes = df_codes[["codcvm", "codemi"]].copy()
df_codes

Unnamed: 0,codcvm,codemi
0,60,ZWVZ
1,94,PATI
...,...,...
1762,26794,SLFT
1763,26808,CPLD


In [12]:
df_stocks = df_stocks.merge(right=df_codes, how='inner', on='codemi')
df_stocks.reset_index(drop=True, inplace=True)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year,codemi,codcvm
0,2011-04-11,ABCB4,BRABCBACNPR4,PN EJ N2,5.49,103,7.81e+04,4.29e+05,101,2011,ABCB,20958
1,2012-04-09,ABCB4,BRABCBACNPR4,PN N2,5.44,501,2e+06,1.09e+07,100,2012,ABCB,20958
...,...,...,...,...,...,...,...,...,...,...,...,...
2979,2022-04-11,VITT3,BRVITTACNOR4,ON NM,12.1,232,1.22e+05,1.49e+06,101,2022,VITT,25763
2980,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,VVEO,25682


In [13]:
df_financials = pd.read_csv(
    "../data/financials.csv",
    parse_dates=['doc_env', 'per_ini', 'per_fim']
)
# Renomear coluna com o código CVM para coincidir com as outras bases
df_financials.rename(columns={"cia_id": "codcvm"}, inplace=True)
df_financials

Unnamed: 0,codcvm,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,8.86e+06,-1.9e+07,2.48e+07,0.2
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,8.86e+06,-1.9e+07,2.11e+07,0.171
...,...,...,...,...,...,...,...,...,...
4056,80195,"G2D Investments,...",2021-03-12 18:33:08,2020-01-01,2020-12-31,6.6e+07,1.04e+08,9.22e+07,0.191
4057,90212,Multilaser Indus...,2018-05-30 15:43:03,2017-01-01,2017-12-31,2.16e+08,-7.08e+07,2.41e+08,0.306


In [14]:
# year = ano em que a informação será usada -> ano seguinte
df_financials['year'] = df_financials.per_fim.dt.year + 1
df_financials

Unnamed: 0,codcvm,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,year
0,94,PANATLANTICA S.A.,2011-03-31 10:16:48,2010-01-01,2010-12-31,8.86e+06,-1.9e+07,2.48e+07,0.2,2011
1,94,PANATLANTICA S.A.,2011-04-01 17:31:56,2010-01-01,2010-12-31,8.86e+06,-1.9e+07,2.11e+07,0.171,2011
...,...,...,...,...,...,...,...,...,...,...
4056,80195,"G2D Investments,...",2021-03-12 18:33:08,2020-01-01,2020-12-31,6.6e+07,1.04e+08,9.22e+07,0.191,2021
4057,90212,Multilaser Indus...,2018-05-30 15:43:03,2017-01-01,2017-12-31,2.16e+08,-7.08e+07,2.41e+08,0.306,2018


In [15]:
# Incluir os dados contábeis em df_stocks 
df_stocks = df_stocks.merge(right=df_financials, how='inner', on=['year', 'codcvm'])
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year,codemi,codcvm,cia_nome,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic
0,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011,AEDU,18961,ANHANGUERA EDUCA...,2011-03-30 00:09:51,2010-01-01,2010-12-31,1.46e+11,-5.71e+08,1.57e+08,0.11
1,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011,AEDU,18961,ANHANGUERA EDUCA...,2011-05-26 11:54:58,2010-01-01,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2741,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,VVEO,25682,CM Hospitalar S.A,2022-03-10 18:22:56,2021-01-01,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237
2742,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,VVEO,25682,CM Hospitalar S.A,2022-03-30 18:25:34,2021-01-01,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237


In [16]:
df_stocks['market_cap'] = df_stocks['shares_outstanding'] * df_stocks['premed']
df_stocks['enterprise_value'] = df_stocks['market_cap'] - df_stocks['net_debt']
df_stocks['earnings_yield'] = df_stocks['ebit'] / df_stocks['enterprise_value']
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year,...,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011,...,2011-03-30 00:09:51,2010-01-01,2010-12-31,1.46e+11,-5.71e+08,1.57e+08,0.11,1.85e+12,1.85e+12,8.5e-05
1,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011,...,2011-05-26 11:54:58,2010-01-01,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11,1.85e+09,2.42e+09,0.065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2741,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,...,2022-03-10 18:22:56,2021-01-01,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.46e+09,4.53e+09,0.109
2742,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,...,2022-03-30 18:25:34,2021-01-01,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.46e+09,4.53e+09,0.109


In [17]:
# Remover revisões de DFPs publicadas no mesmo dia ou posteriores ao corte
# No livro, o corte é de uma semana
df_stocks.query('doc_env.dt.date < datneg')

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year,...,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,101,2011,...,2011-03-30 00:09:51,2010-01-01,2010-12-31,1.46e+11,-5.71e+08,1.57e+08,0.11,1.85e+12,1.85e+12,8.5e-05
2,2012-04-09,AEDU3,BRAEDUACNOR9,ON NM,8.59,1978,3.53e+06,3.03e+07,100,2012,...,2012-03-31 23:15:31,2011-01-01,2011-12-31,1.46e+08,2.92e+08,8.61e+07,0.0375,1.25e+09,9.59e+08,0.0898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2741,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,...,2022-03-10 18:22:56,2021-01-01,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.46e+09,4.53e+09,0.109
2742,2022-04-11,VVEO3,BRVVEOACNOR0,ON NM,15.6,871,1.99e+05,3.1e+06,101,2022,...,2022-03-30 18:25:34,2021-01-01,2021-12-31,2.86e+08,-7.65e+07,4.92e+08,0.237,4.46e+09,4.53e+09,0.109


In [18]:
# Manter somente a DFP mais recente ao corte para cada ativo
df_stocks.sort_values('doc_env', inplace=True)
df_stocks.drop_duplicates(subset=['codneg', 'year'], keep='last', inplace=True)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,day_year,year,...,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
1750,2011-04-11,TOTS3,BRTOTSACNOR8,ON NM,8.73,244,6.43e+05,5.62e+06,101,2011,...,2011-01-31 19:05:59,2010-01-01,2010-12-31,3.15e+07,1.79e+08,2.12e+08,0.261,2.75e+08,9.53e+07,2.22
1672,2011-04-11,STBP11,BRSTBPCDAM10,UNT N2,20.6,53,4.7e+04,9.69e+05,101,2011,...,2011-02-03 20:15:06,2010-01-01,2010-12-31,6.56e+08,3.59e+08,2.1e+08,0.133,1.35e+10,1.32e+10,0.0159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2612,2022-04-11,ALLD3,BRALLDACNOR3,ON NM,14.2,273,4.67e+04,6.65e+05,101,2022,...,2022-04-27 12:11:23,2021-01-01,2021-12-31,9.32e+07,-3.38e+08,4.08e+08,0.347,1.33e+09,1.66e+09,0.245
2735,2022-04-11,RECV3,BRRECVACNOR3,ON NM,23,2648,4.08e+05,9.37e+06,101,2022,...,2022-06-02 17:08:52,2021-01-01,2021-12-31,2.49e+08,-6.15e+07,2.85e+08,0.157,5.7e+09,5.77e+09,0.0494


In [19]:
# Remover colunas intermediárias
df_stocks.drop(columns=["day_year"], inplace=True)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,year,codemi,...,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
1750,2011-04-11,TOTS3,BRTOTSACNOR8,ON NM,8.73,244,6.43e+05,5.62e+06,2011,TOTS,...,2011-01-31 19:05:59,2010-01-01,2010-12-31,3.15e+07,1.79e+08,2.12e+08,0.261,2.75e+08,9.53e+07,2.22
1672,2011-04-11,STBP11,BRSTBPCDAM10,UNT N2,20.6,53,4.7e+04,9.69e+05,2011,STBP,...,2011-02-03 20:15:06,2010-01-01,2010-12-31,6.56e+08,3.59e+08,2.1e+08,0.133,1.35e+10,1.32e+10,0.0159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2612,2022-04-11,ALLD3,BRALLDACNOR3,ON NM,14.2,273,4.67e+04,6.65e+05,2022,ALLD,...,2022-04-27 12:11:23,2021-01-01,2021-12-31,9.32e+07,-3.38e+08,4.08e+08,0.347,1.33e+09,1.66e+09,0.245
2735,2022-04-11,RECV3,BRRECVACNOR3,ON NM,23,2648,4.08e+05,9.37e+06,2022,RECV,...,2022-06-02 17:08:52,2021-01-01,2021-12-31,2.49e+08,-6.15e+07,2.85e+08,0.157,5.7e+09,5.77e+09,0.0494


In [20]:
# Remover o ativo menos líquido da mesma empresa no ano pelo critério do núm.
# de negociações
df_stocks.sort_values(by=['year', 'codemi', 'totneg'], inplace=True)
df_stocks.drop_duplicates(
    subset=['codemi', 'year'], keep='last', inplace=True, ignore_index=True
)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,year,codemi,...,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,2011,AEDU,...,2011-05-26 11:54:58,2010-01-01,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11,1.85e+09,2.42e+09,0.065
1,2011-04-11,ALPA4,BRALPAACNPR7,PN N1,5.28,158,5.89e+05,3.11e+06,2011,ALPA,...,2011-03-30 16:55:23,2010-01-01,2010-12-31,3.53e+08,-3.59e+08,3.24e+08,0.328,1.87e+09,2.23e+09,0.146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669,2022-04-11,WLMM4,BRWLMMACNPR3,PN EJ,34.2,14,1.82e+03,6.22e+04,2022,WLMM,...,2022-03-22 22:18:18,2021-01-01,2021-12-31,3.64e+07,-1.52e+08,1.36e+08,0.349,1.25e+09,1.4e+09,0.0973
1670,2022-04-11,YDUQ3,BRYDUQACNOR3,ON NM,19.6,10252,2.64e+06,5.17e+07,2022,YDUQ,...,2022-03-15 18:09:20,2021-01-01,2021-12-31,3.09e+08,3.69e+09,5.47e+08,0.0788,6.05e+09,2.35e+09,0.232


In [21]:
# O Livro fala em empresas com pelos menos USD 50 milhões de valor de mercado
df_stocks.query('market_cap > 250_000_000', inplace=True)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,year,codemi,...,doc_env,per_ini,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,2011,AEDU,...,2011-05-26 11:54:58,2010-01-01,2010-12-31,1.46e+08,-5.71e+08,1.57e+08,0.11,1.85e+09,2.42e+09,0.065
1,2011-04-11,ALPA4,BRALPAACNPR7,PN N1,5.28,158,5.89e+05,3.11e+06,2011,ALPA,...,2011-03-30 16:55:23,2010-01-01,2010-12-31,3.53e+08,-3.59e+08,3.24e+08,0.328,1.87e+09,2.23e+09,0.146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669,2022-04-11,WLMM4,BRWLMMACNPR3,PN EJ,34.2,14,1.82e+03,6.22e+04,2022,WLMM,...,2022-03-22 22:18:18,2021-01-01,2021-12-31,3.64e+07,-1.52e+08,1.36e+08,0.349,1.25e+09,1.4e+09,0.0973
1670,2022-04-11,YDUQ3,BRYDUQACNOR3,ON NM,19.6,10252,2.64e+06,5.17e+07,2022,YDUQ,...,2022-03-15 18:09:20,2021-01-01,2021-12-31,3.09e+08,3.69e+09,5.47e+08,0.0788,6.05e+09,2.35e+09,0.232


In [22]:
df_stocks['rank_roic'] = (
    df_stocks.groupby(by=['year'])['roic']
             .rank(method='dense', ascending=False)
)
df_stocks['rank_ey'] = (
    df_stocks.groupby(by=['year'])['earnings_yield']
             .rank(method='dense', ascending=False)
)
df_stocks['ranks_sum'] = df_stocks['rank_roic'] + df_stocks['rank_ey']
df_stocks['rank_final'] = (
    df_stocks.groupby(by=['year'])['ranks_sum']
             .rank(method='first', ascending=True)
)
df_stocks

Unnamed: 0,datneg,codneg,codisi,especi,premed,totneg,quatot,voltot,year,codemi,...,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_roic,rank_ey,ranks_sum,rank_final
0,2011-04-11,AEDU3,BRAEDUACNOR9,ON NM,12.7,978,4.13e+06,5.24e+07,2011,AEDU,...,-5.71e+08,1.57e+08,0.11,1.85e+09,2.42e+09,0.065,92,82,174,96
1,2011-04-11,ALPA4,BRALPAACNPR7,PN N1,5.28,158,5.89e+05,3.11e+06,2011,ALPA,...,-3.59e+08,3.24e+08,0.328,1.87e+09,2.23e+09,0.146,17,59,76,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669,2022-04-11,WLMM4,BRWLMMACNPR3,PN EJ,34.2,14,1.82e+03,6.22e+04,2022,WLMM,...,-1.52e+08,1.36e+08,0.349,1.25e+09,1.4e+09,0.0973,30,100,130,53
1670,2022-04-11,YDUQ3,BRYDUQACNOR3,ON NM,19.6,10252,2.64e+06,5.17e+07,2022,YDUQ,...,3.69e+09,5.47e+08,0.0788,6.05e+09,2.35e+09,0.232,148,65,213,116


In [23]:
df_stocks.sort_values(by=['year', 'rank_final'], inplace=True)
# Alterar o nome do datneg
df_stocks.rename(columns={"datneg": "cutoff_date"}, inplace=True)
df_stocks

Unnamed: 0,cutoff_date,codneg,codisi,especi,premed,totneg,quatot,voltot,year,codemi,...,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_roic,rank_ey,ranks_sum,rank_final
19,2011-04-11,CIEL3,BRCIELACNOR3,ON NM,4.36,4723,8.89e+06,3.88e+07,2011,CIEL,...,-2.51e+08,2.37e+09,2.51,5.95e+09,6.2e+09,0.383,2,21,23,1
79,2011-04-11,LREN3,BRLRENACNOR1,ON NM,6.56,3604,7.84e+06,5.15e+07,2011,LREN,...,-2.72e+07,4.04e+08,0.407,8.03e+08,8.3e+08,0.487,10,15,25,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1477,2022-04-11,AMAR3,BRAMARACNOR4,ON NM,2.91,3715,4.49e+06,1.31e+07,2022,AMAR,...,1.2e+09,4.8e+06,0.00218,7.61e+08,-4.4e+08,-0.0109,195,175,370,194
1508,2022-04-11,COGN3,BRCOGNACNOR2,ON NM,2.69,38694,5.98e+07,1.61e+08,2022,COGN,...,5.87e+09,7.84e+07,0.00399,5.05e+09,-8.19e+08,-0.0957,194,178,372,195


In [24]:
df_stocks.to_csv("../data/magic_stocks.csv", index=False)

In [25]:
cols = ['year', 'codemi', 'cia_nome', 'doc_env', 'rank_roic', 'rank_ey', 'ranks_sum', 'rank_final']
df_stocks.query('year == 2022')[cols].head(20)

Unnamed: 0,year,codemi,cia_nome,doc_env,rank_roic,rank_ey,ranks_sum,rank_final
1493,2022,BRKM,BRASKEM S.A.,2022-03-16 19:43:41,6,4,10,1
1501,2022,CEBR,CIA ENERGETICA D...,2022-03-25 18:57:07,1,9,10,2
...,...,...,...,...,...,...,...,...
1531,2022,ENAT,ENAUTA PARTICIPA...,2022-03-17 21:43:45,5,53,58,19
1538,2022,EUCA,EUCATEX S.A. IND...,2022-03-30 20:18:52,51,11,62,20


In [26]:
df_stocks.query('codneg == "PRIO3"')

Unnamed: 0,cutoff_date,codneg,codisi,especi,premed,totneg,quatot,voltot,year,codemi,...,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_roic,rank_ey,ranks_sum,rank_final
1251,2020-04-09,PRIO3,BRPRIOACNOR1,ON NM,5.31,26671,45100000.0,239000000.0,2020,PRIO,...,1570000000.0,878000000.0,0.235,760000000.0,-812000000.0,-1.08,18,126,144,81
1418,2021-04-12,PRIO3,BRPRIOACNOR1,ON NM,19.3,27821,34100000.0,656000000.0,2021,PRIO,...,1700000000.0,943000000.0,0.194,2790000000.0,1090000000.0,0.868,48,7,55,12
1614,2022-04-11,PRIO3,BRPRIOACNOR1,ON NM,23.5,20863,11800000.0,276000000.0,2022,PRIO,...,-750000000.0,2120000000.0,0.361,20600000000.0,21400000000.0,0.0992,28,98,126,49
