In [1]:
from pathlib import Path
import pandas as pd
# Mostrar floats com duas casas decimas
pd.set_option('display.float_format',  lambda x: '%.2f' % x)
pd.options.display.max_colwidth = 20
pd.options.display.max_columns = 20
pd.options.display.max_rows = 6

In [2]:
# Carregar base NÃO AJUSTADA do S3 na seguinte raíz: s3://aq-dl/HistoricalQuotations/
# Caminho local:
DATA_FOLDER = Path("/mnt/aq_disk/data/HistoricalQuotations/interim")
# BASE_ADJ = DATA_FOLDER / "base_adj.feather"
DATASET = DATA_FOLDER / "dataset_22.feather"
cols = ['datneg', 'codneg', 'nomres', 'especi', 'codbdi', 'tpmerc', 'preult', 'premed', 'totneg', 'voltot']
df_magic = pd.read_feather(DATASET)[cols].query('tpmerc == 10')
df_magic

Unnamed: 0,datneg,codneg,nomres,especi,codbdi,tpmerc,preult,premed,totneg,voltot
0,2022-01-03,A1AP34,ADVANCE AUTO,DRN,2,10,83.12,85.24,2,10399.84
1,2022-01-03,A1BB34,ABB LTD,DRN,2,10,54.60,54.31,8,10429.02
2,2022-01-03,A1CR34,AMCOR PLC,DRN,2,10,66.90,66.90,1,133.80
...,...,...,...,...,...,...,...,...,...,...
1042499,2022-07-08,EQPA5,EQTL PARA,PNA,2,10,6.70,6.70,1,670.00
1042500,2022-07-08,EQTL3,EQUATORIAL,ON NM,2,10,22.94,23.13,16729,116353756.00
1042502,2022-07-08,ESGB11,ETF ESG BTG,CI,14,10,91.54,91.83,7,8815.84


#### Filtrar:
1. Cotações após 2011
2. Lote padrão (codbdi == 2) -> remover empresas em concordata, recuperação judicial, etc. da entrada na seleção (não da saída!)
3. Ações ON, PN ou PNA

In [3]:
df_magic = (
    df_magic
    .query('codbdi == 2 and especi.str.contains("ON |PN |PNA")')
    .reset_index(drop=True)
)
# datneg >= "2011.01.01" and \
df_magic

Unnamed: 0,datneg,codneg,nomres,especi,codbdi,tpmerc,preult,premed,totneg,voltot
0,2022-01-03,AALR3,ALLIAR,ON NM,2,10,13.58,13.88,2997,8342307.00
1,2022-01-03,ABCB4,ABC BRASIL,PN EJ N2,2,10,15.55,15.75,7350,19383938.00
2,2022-01-03,AERI3,AERIS,ON NM,2,10,6.55,6.65,8390,17335475.00
...,...,...,...,...,...,...,...,...,...,...
38431,2022-07-08,ENJU3,ENJOEI,ON NM,2,10,1.13,1.16,1335,3389415.00
38432,2022-07-08,EQPA5,EQTL PARA,PNA,2,10,6.70,6.70,1,670.00
38433,2022-07-08,EQTL3,EQUATORIAL,ON NM,2,10,22.94,23.13,16729,116353756.00


In [4]:
# Manter somente as colunas que serão usadas para fazer o corte nas datas
cols = ['datneg', 'codneg', 'nomres', 'premed', 'totneg']
df_magic = df_magic.loc[:, cols]
# Inserir o cód. dos emissores -> 4 primeiros caracteres do código de negociação
df_magic['codemi'] = df_magic['codneg'].str[0:4]
# df_magic['year'] = df_magic['datneg'].dt.year
# Ordenar o dataframe por ativo e data
df_magic.sort_values(by=['codneg', 'datneg'], inplace=True)
print('Number of companies available for backtesting', df_magic.codemi.nunique())
df_magic

Number of companies available for backtesting 311


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi
0,2022-01-03,AALR3,ALLIAR,13.88,2997,AALR
287,2022-01-04,AALR3,ALLIAR,13.71,4468,AALR
581,2022-01-05,AALR3,ALLIAR,13.01,1712,AALR
...,...,...,...,...,...,...
37737,2022-07-13,YDUQ3,YDUQS PART,13.88,19815,YDUQ
38298,2022-07-14,YDUQ3,YDUQS PART,13.95,9591,YDUQ
36105,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ


In [5]:
# Criar coluna com a média móvel de 30 dias do número de negociações de cada ativo
df_magic['totneg_sma30'] = df_magic.groupby('codneg')['totneg'].transform(lambda x: x.rolling(30, 1).mean())
df_magic

Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,totneg_sma30
0,2022-01-03,AALR3,ALLIAR,13.88,2997,AALR,2997.00
287,2022-01-04,AALR3,ALLIAR,13.71,4468,AALR,3732.50
581,2022-01-05,AALR3,ALLIAR,13.01,1712,AALR,3059.00
...,...,...,...,...,...,...,...
37737,2022-07-13,YDUQ3,YDUQS PART,13.88,19815,YDUQ,10983.07
38298,2022-07-14,YDUQ3,YDUQS PART,13.95,9591,YDUQ,11001.23
36105,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ,11073.77


In [6]:
# Definir o dia de corte e remover negociações posteriores ao corte
df_magic.query('datneg == datneg.max()', inplace=True)
df_magic.reset_index(drop=True, inplace=True)
print('Number of available companies:', df_magic.codemi.nunique())
df_magic

Number of available companies: 253


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,totneg_sma30
0,2022-07-15,AALR3,ALLIAR,19.86,763,AALR,1252.83
1,2022-07-15,ABCB4,ABC BRASIL,15.41,3599,ABCB,4875.73
2,2022-07-15,AERI3,AERIS,2.71,3763,AERI,5219.07
...,...,...,...,...,...,...,...
278,2022-07-15,WIZS3,WIZ S.A.,7.12,4387,WIZS,3198.67
279,2022-07-15,WLMM4,WLM IND COM,20.88,1,WLMM,12.90
280,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ,11073.77


In [7]:
# Excluir empresas financeiras e prestadoras de serviços públicos (utilities) conforme
# lista extraída do seguinte arquivo da B3, sem versionamento:
# https://bvmf.bmfbovespa.com.br/InstDados/InformacoesEmpresas/ClassifSetorial.zip
excluded_companies = pd.read_csv('../data/external/excluded_companies.csv')
excluded_companies = excluded_companies['company_code'].to_list()
df_magic.query('codemi != @excluded_companies', inplace=True)
print('Number of companies available for backtesting', df_magic.codemi.nunique())
df_magic

Number of companies available for backtesting 199


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,totneg_sma30
0,2022-07-15,AALR3,ALLIAR,19.86,763,AALR,1252.83
2,2022-07-15,AERI3,AERIS,2.71,3763,AERI,5219.07
4,2022-07-15,AGRO3,BRASILAGRO,22.58,2804,AGRO,3268.50
...,...,...,...,...,...,...,...
277,2022-07-15,WEST3,WESTWING,2.22,456,WEST,2056.33
279,2022-07-15,WLMM4,WLM IND COM,20.88,1,WLMM,12.90
280,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ,11073.77


In [8]:
# Carregar dataframe com o código CVM das empresas listadas
df_cod = pd.read_pickle('/mnt/aq_disk/data/AQ/cod_emissor.pkl')
df_cod

Unnamed: 0,codcvm,cnpj,densoc,situac,codemi
0,60,18451005000104,ACOPALMA CIA IND...,CANCELADA,ZWVZ
1,94,92693019000189,PANATLANTICA SA,ATIVO,PATI
2,108,60664810000174,AÇOS VILLARES SA,CANCELADA,AVIL
...,...,...,...,...,...
1766,26824,43335774000186,TRAVESSIA SECURI...,ATIVO,TMER
1767,26832,38482780000126,ANEMUS WIND HOLD...,ATIVO,ANEM
1768,26840,44841035000129,SAP SECURITIZADO...,ATIVO,SAPS


In [9]:
# A chave da união será o cód. do emissor (codemi)
# Somente o cód. CVM (codcvm) será inserido na união -> Remover colunas que não serão 
# usadas na operação de merge
df_cod = df_cod[['codcvm', 'codemi']].copy()
df_cod

Unnamed: 0,codcvm,codemi
0,60,ZWVZ
1,94,PATI
2,108,AVIL
...,...,...
1766,26824,TMER
1767,26832,ANEM
1768,26840,SAPS


In [10]:
# Obter os códigos de emissão dos ativos unindo os dataframes 
df_magic = df_magic.merge(right=df_cod, how='inner', on='codemi')
df_magic.reset_index(drop=True, inplace=True)
print('Number of companies available for backtesting:', df_magic.codemi.nunique())
df_magic

Number of companies available for backtesting: 199


Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,totneg_sma30,codcvm
0,2022-07-15,AALR3,ALLIAR,19.86,763,AALR,1252.83,24058
1,2022-07-15,AERI3,AERIS,2.71,3763,AERI,5219.07,25283
2,2022-07-15,AGRO3,BRASILAGRO,22.58,2804,AGRO,3268.50,20036
...,...,...,...,...,...,...,...,...
214,2022-07-15,WEST3,WESTWING,2.22,456,WEST,2056.33,25518
215,2022-07-15,WLMM4,WLM IND COM,20.88,1,WLMM,12.90,11070
216,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ,11073.77,21016


In [11]:
# Carregar dataframe com os dados financeiros das empresas
df_fin = (pd
    .read_csv(
        '../data/magic_financials.csv',
        parse_dates=['doc_env', 'per_ini', 'per_fim']
        )
)
# Renomear coluna com o código CVM para coincidir com as outras bases
df_fin.rename(columns={'cia_id': 'codcvm'}, inplace=True)
# As colunas 'per_ini' e 'cia_nome' não serão usadas
df_fin.drop(columns=['per_ini', 'cia_nome'], inplace=True)
# year = ano em que a informação será usada -> ano seguinte ao fim do período
# df_fin['year'] = df_fin.per_fim.dt.year + 1
df_fin.query('per_fim.dt.year == 2021', inplace=True)
df_fin

Unnamed: 0,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic
13,94,2022-03-03 07:48:05,2021-12-31,23932000.00,148873000.00,415832000.00,0.27
32,1562,2022-03-31 22:28:44,2021-12-31,9800000.00,-22401000.00,11872000.00,0.07
34,1562,2022-06-08 19:35:01,2021-12-31,9800000.00,-22401000.00,11872000.00,0.07
...,...,...,...,...,...,...,...
2912,26603,2022-03-28 18:01:25,2021-12-31,272756849.00,346064000.00,38933000.00,0.06
2914,26700,2022-03-29 18:27:45,2021-12-31,858714812.00,1659228000.00,1449802000.00,0.19
2915,26786,2022-02-01 19:43:03,2021-12-31,1269683.00,-88617000.00,137222000.00,0.56


In [12]:
df_fin.sort_values('doc_env', inplace=True)
# Manter somente a última DFP publicada da empresa
df_fin.drop_duplicates(subset='codcvm', keep='last', inplace=True)
df_fin.sort_values('codcvm', inplace=True)
df_fin

Unnamed: 0,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic
13,94,2022-03-03 07:48:05,2021-12-31,23932000.00,148873000.00,415832000.00,0.27
34,1562,2022-06-08 19:35:01,2021-12-31,9800000.00,-22401000.00,11872000.00,0.07
49,2100,2022-02-24 19:33:20,2021-12-31,42275080.00,92117000.00,36092000.00,0.11
...,...,...,...,...,...,...,...
2912,26603,2022-03-28 18:01:25,2021-12-31,272756849.00,346064000.00,38933000.00,0.06
2914,26700,2022-03-29 18:27:45,2021-12-31,858714812.00,1659228000.00,1449802000.00,0.19
2915,26786,2022-02-01 19:43:03,2021-12-31,1269683.00,-88617000.00,137222000.00,0.56


In [13]:
# Incluir os dados contábeis em 'df_magic' 
df_magic = df_magic.merge(right=df_fin, how='inner', on=['codcvm'])
df_magic

Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,totneg_sma30,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic
0,2022-07-15,AALR3,ALLIAR,19.86,763,AALR,1252.83,24058,2022-03-16 22:21:30,2021-12-31,118292816.00,650676000.00,88174000.00,0.03
1,2022-07-15,AERI3,AERIS,2.71,3763,AERI,5219.07,25283,2022-02-17 19:34:44,2021-12-31,766213456.00,565811000.00,207893000.00,0.09
2,2022-07-15,AGXY3,AGROGALAXY,7.45,635,AGXY,410.07,25658,2022-03-28 18:08:31,2021-12-31,170824989.00,634523000.00,324884000.00,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,2022-07-15,WEGE3,WEG,26.34,12677,WEGE,21164.43,5410,2022-02-16 07:05:37,2021-12-31,4197317998.00,-1428020000.00,4158343000.00,0.22
157,2022-07-15,WLMM4,WLM IND COM,20.88,1,WLMM,12.90,11070,2022-03-22 22:18:18,2021-12-31,36414670.00,-152005000.00,136133000.00,0.23
158,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ,11073.77,21016,2022-03-15 18:09:20,2021-12-31,309089000.00,3692835000.00,546592000.00,0.05


In [14]:
# Converter os dados contábeis para milhões para facilitar verificações posteriores
accounting_columns = ['shares_outstanding', 'net_debt', 'ebit']
df_magic[accounting_columns] = df_magic[accounting_columns] / 1_000_000
df_magic

Unnamed: 0,datneg,codneg,nomres,premed,totneg,codemi,totneg_sma30,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic
0,2022-07-15,AALR3,ALLIAR,19.86,763,AALR,1252.83,24058,2022-03-16 22:21:30,2021-12-31,118.29,650.68,88.17,0.03
1,2022-07-15,AERI3,AERIS,2.71,3763,AERI,5219.07,25283,2022-02-17 19:34:44,2021-12-31,766.21,565.81,207.89,0.09
2,2022-07-15,AGXY3,AGROGALAXY,7.45,635,AGXY,410.07,25658,2022-03-28 18:08:31,2021-12-31,170.82,634.52,324.88,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,2022-07-15,WEGE3,WEG,26.34,12677,WEGE,21164.43,5410,2022-02-16 07:05:37,2021-12-31,4197.32,-1428.02,4158.34,0.22
157,2022-07-15,WLMM4,WLM IND COM,20.88,1,WLMM,12.90,11070,2022-03-22 22:18:18,2021-12-31,36.41,-152.00,136.13,0.23
158,2022-07-15,YDUQ3,YDUQS PART,14.44,10135,YDUQ,11073.77,21016,2022-03-15 18:09:20,2021-12-31,309.09,3692.84,546.59,0.05


In [15]:
# Calcular os indicadores que dependem do preço do ação
df_magic['market_cap'] = df_magic['shares_outstanding'] * df_magic['premed']
df_magic['enterprise_value'] = df_magic['market_cap'] - df_magic['net_debt']
df_magic['earnings_yield'] = df_magic['ebit'] / df_magic['enterprise_value']
# A coluna 'premed' não será mais usada
df_magic.drop(columns=['premed'], inplace=True)
df_magic

Unnamed: 0,datneg,codneg,nomres,totneg,codemi,totneg_sma30,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2022-07-15,AALR3,ALLIAR,763,AALR,1252.83,24058,2022-03-16 22:21:30,2021-12-31,118.29,650.68,88.17,0.03,2349.30,1698.62,0.05
1,2022-07-15,AERI3,AERIS,3763,AERI,5219.07,25283,2022-02-17 19:34:44,2021-12-31,766.21,565.81,207.89,0.09,2076.44,1510.63,0.14
2,2022-07-15,AGXY3,AGROGALAXY,635,AGXY,410.07,25658,2022-03-28 18:08:31,2021-12-31,170.82,634.52,324.88,0.10,1272.65,638.12,0.51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,2022-07-15,WEGE3,WEG,12677,WEGE,21164.43,5410,2022-02-16 07:05:37,2021-12-31,4197.32,-1428.02,4158.34,0.22,110557.36,111985.38,0.04
157,2022-07-15,WLMM4,WLM IND COM,1,WLMM,12.90,11070,2022-03-22 22:18:18,2021-12-31,36.41,-152.00,136.13,0.23,760.34,912.34,0.15
158,2022-07-15,YDUQ3,YDUQS PART,10135,YDUQ,11073.77,21016,2022-03-15 18:09:20,2021-12-31,309.09,3692.84,546.59,0.05,4463.25,770.41,0.71


In [16]:
# Existem algumas empresas, como a COGN3, com baixo valor de mercado e excesso de
# dívida líquida resultando em um EV negativo e, consequentemente, um EY (EBIT/EV) 
# negativo ou distorcido (tendendo a infinito). Apesar do lívro não deixar isso claro,
# iremos remover essas empresas da seleção.
df_magic.query('enterprise_value > 100', inplace=True)
print('Number of companies available for backtesting', df_magic.codneg.nunique())

Number of companies available for backtesting 134


In [17]:
# Remover o ativo menos líquido da empresa (coluna 'totneg_sma30')
df_magic.sort_values(by=['codemi', 'totneg_sma30'], inplace=True)
df_magic.drop_duplicates(
    subset=['codemi'], keep='last', inplace=True, ignore_index=True
)
# A coluna 'codemi' não será mais necessária
df_magic.drop(columns='codemi', inplace=True)
print('Number of companies available for backtesting', df_magic.codneg.nunique())
df_magic

Number of companies available for backtesting 123


Unnamed: 0,datneg,codneg,nomres,totneg,totneg_sma30,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2022-07-15,AALR3,ALLIAR,763,1252.83,24058,2022-03-16 22:21:30,2021-12-31,118.29,650.68,88.17,0.03,2349.30,1698.62,0.05
1,2022-07-15,AERI3,AERIS,3763,5219.07,25283,2022-02-17 19:34:44,2021-12-31,766.21,565.81,207.89,0.09,2076.44,1510.63,0.14
2,2022-07-15,AGXY3,AGROGALAXY,635,410.07,25658,2022-03-28 18:08:31,2021-12-31,170.82,634.52,324.88,0.10,1272.65,638.12,0.51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,2022-07-15,WEGE3,WEG,12677,21164.43,5410,2022-02-16 07:05:37,2021-12-31,4197.32,-1428.02,4158.34,0.22,110557.36,111985.38,0.04
121,2022-07-15,WLMM4,WLM IND COM,1,12.90,11070,2022-03-22 22:18:18,2021-12-31,36.41,-152.00,136.13,0.23,760.34,912.34,0.15
122,2022-07-15,YDUQ3,YDUQS PART,10135,11073.77,21016,2022-03-15 18:09:20,2021-12-31,309.09,3692.84,546.59,0.05,4463.25,770.41,0.71


In [18]:
# O Livro fala em empresas com pelos menos USD 50 milhões de valor de mercado
# Remover empresas com menos de R$ 250 milhões de valor de mercado
df_magic.query('market_cap > 250', inplace=True)
# Remover ações com baixíssima liquidez no dia
df_magic.query('totneg_sma30 > 100', inplace=True)
# As colunas 'totneg' e 'totneg_sma30' não são mais necessárias
df_magic.drop(columns=['totneg', 'totneg_sma30'], inplace=True)
print('Number of companies available for backtesting', df_magic.codneg.nunique())
df_magic

Number of companies available for backtesting 116


Unnamed: 0,datneg,codneg,nomres,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield
0,2022-07-15,AALR3,ALLIAR,24058,2022-03-16 22:21:30,2021-12-31,118.29,650.68,88.17,0.03,2349.30,1698.62,0.05
1,2022-07-15,AERI3,AERIS,25283,2022-02-17 19:34:44,2021-12-31,766.21,565.81,207.89,0.09,2076.44,1510.63,0.14
2,2022-07-15,AGXY3,AGROGALAXY,25658,2022-03-28 18:08:31,2021-12-31,170.82,634.52,324.88,0.10,1272.65,638.12,0.51
...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,2022-07-15,VVEO3,VIVEO,25682,2022-03-30 18:25:34,2021-12-31,286.12,-76.53,492.30,0.16,4403.44,4479.97,0.11
120,2022-07-15,WEGE3,WEG,5410,2022-02-16 07:05:37,2021-12-31,4197.32,-1428.02,4158.34,0.22,110557.36,111985.38,0.04
122,2022-07-15,YDUQ3,YDUQS PART,21016,2022-03-15 18:09:20,2021-12-31,309.09,3692.84,546.59,0.05,4463.25,770.41,0.71


In [19]:
df_magic['rank_roic'] = df_magic['roic'].rank(method='dense', ascending=False)
df_magic['rank_ey'] = df_magic['earnings_yield'].rank(method='dense', ascending=False)
df_magic['ranks_sum'] = df_magic['rank_roic'] + df_magic['rank_ey']
df_magic['rank_final'] = df_magic['ranks_sum'].rank(method='first', ascending=True)
cols_integer = ['rank_roic', 'rank_ey', 'ranks_sum', 'rank_final']
df_magic[cols_integer] = df_magic[cols_integer].astype(int)
df_magic

Unnamed: 0,datneg,codneg,nomres,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_roic,rank_ey,ranks_sum,rank_final
0,2022-07-15,AALR3,ALLIAR,24058,2022-03-16 22:21:30,2021-12-31,118.29,650.68,88.17,0.03,2349.30,1698.62,0.05,103,94,197,108
1,2022-07-15,AERI3,AERIS,25283,2022-02-17 19:34:44,2021-12-31,766.21,565.81,207.89,0.09,2076.44,1510.63,0.14,69,59,128,67
2,2022-07-15,AGXY3,AGROGALAXY,25658,2022-03-28 18:08:31,2021-12-31,170.82,634.52,324.88,0.10,1272.65,638.12,0.51,64,20,84,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,2022-07-15,VVEO3,VIVEO,25682,2022-03-30 18:25:34,2021-12-31,286.12,-76.53,492.30,0.16,4403.44,4479.97,0.11,35,67,102,43
120,2022-07-15,WEGE3,WEG,5410,2022-02-16 07:05:37,2021-12-31,4197.32,-1428.02,4158.34,0.22,110557.36,111985.38,0.04,20,103,123,65
122,2022-07-15,YDUQ3,YDUQS PART,21016,2022-03-15 18:09:20,2021-12-31,309.09,3692.84,546.59,0.05,4463.25,770.41,0.71,88,15,103,46


In [20]:
df_magic.sort_values('rank_final', inplace=True)
# Remover colunas de cálculo intemediário
df_magic.drop(columns=['rank_roic', 'rank_ey', 'ranks_sum'], inplace=True)
df_magic

Unnamed: 0,datneg,codneg,nomres,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
14,2022-07-15,BRKM5,BRASKEM,4820,2022-03-16 19:43:41,2021-12-31,797.21,22861.69,26043.55,0.59,27160.87,4299.18,6.06,1
12,2022-07-15,BRAP4,BRADESPAR,18724,2022-03-29 06:35:35,2021-12-31,393.10,-294.71,8017.27,0.73,8726.75,9021.46,0.89,2
39,2022-07-15,GOAU4,GERDAU MET,8656,2022-02-23 07:39:32,2021-12-31,1087.24,6484.66,20984.57,0.28,10524.51,4039.85,5.19,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,2022-07-15,DASA3,DASA,19623,2022-03-29 12:56:36,2021-12-31,560.51,4032.79,88.52,0.01,9696.84,5664.05,0.02,114
27,2022-07-15,ELMD3,ELETROMIDIA,25569,2022-03-29 19:03:12,2021-12-31,139.14,28.30,9.11,0.01,1339.96,1311.66,0.01,115
59,2022-07-15,LWSA3,LOCAWEB,24910,2022-03-29 15:08:01,2021-12-31,589.58,-1480.19,8.53,0.00,3443.16,4923.35,0.00,116


In [21]:
# Select only the first 30 companies in the magic rank for each year
df_magic.query('rank_final <= 30', inplace=True)
df_magic.reset_index(drop=True, inplace=True)
print('Number of selected companies for backtesting', df_magic.codneg.nunique())
df_magic

Number of selected companies for backtesting 30


Unnamed: 0,datneg,codneg,nomres,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
0,2022-07-15,BRKM5,BRASKEM,4820,2022-03-16 19:43:41,2021-12-31,797.21,22861.69,26043.55,0.59,27160.87,4299.18,6.06,1
1,2022-07-15,BRAP4,BRADESPAR,18724,2022-03-29 06:35:35,2021-12-31,393.10,-294.71,8017.27,0.73,8726.75,9021.46,0.89,2
2,2022-07-15,GOAU4,GERDAU MET,8656,2022-02-23 07:39:32,2021-12-31,1087.24,6484.66,20984.57,0.28,10524.51,4039.85,5.19,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27,2022-07-15,RAIZ4,RAIZEN,25917,2021-05-31 20:28:54,2021-03-31,1743.42,5467.27,1873.76,0.11,7566.42,2099.16,0.89,28
28,2022-07-15,JHSF3,JHSF PART,20605,2022-02-24 19:37:23,2021-12-31,686.22,1099.50,1113.51,0.13,3787.96,2688.46,0.41,29
29,2022-07-15,LJQQ3,QUERO-QUERO,25038,2022-03-10 18:41:17,2021-12-31,187.29,-73.43,163.94,0.23,1065.68,1139.11,0.14,30


In [31]:
# Save file
df_magic.to_csv('../data/today_rank.csv', index=False)

In [32]:
# Test file
pd.read_csv('../data/today_rank.csv')

Unnamed: 0,balancing_on,codneg,nomres,codcvm,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
0,2011-04-11,TOTS3,TOTVS,19992,2011-01-31 19:05:59,2010-12-31,31.46,179.42,211.67,0.26,994.10,814.68,0.26,1
1,2011-04-11,AUTM3,AUTOMETAL,22381,2011-02-28 14:52:34,2010-12-31,94.42,252.11,247.23,0.29,1606.17,1354.06,0.18,2
2,2011-04-11,VALE5,VALE,4170,2011-02-24 23:49:15,2010-12-31,5365.31,30321.40,40442.26,0.28,254959.29,224637.90,0.18,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,2022-04-11,DXCO3,DEXCO,21091,2022-02-09 20:32:27,2021-12-31,760.96,2448.35,1891.39,0.23,10021.88,7573.54,0.25,28
358,2022-04-11,JHSF3,JHSF PART,20605,2022-02-24 19:37:23,2021-12-31,686.22,1099.50,1113.51,0.20,4412.42,3312.92,0.34,29
359,2022-04-11,CSAN3,COSAN,19836,2022-02-24 14:39:01,2021-12-31,1874.07,32752.58,8676.31,0.14,43440.96,10688.38,0.81,30


In [22]:
# Check 2022 data
df_22 = df_magic[['codneg', 'nomres', 'roic', 'earnings_yield']].reset_index(drop=True)
df_22.index += 1
df_22[['roic', 'earnings_yield']] = df_22[['roic', 'earnings_yield']].round(2)
print(df_22.to_markdown(mode='github'))

|    | codneg   | nomres       |   roic |   earnings_yield |
|---:|:---------|:-------------|-------:|-----------------:|
|  1 | BRKM5    | BRASKEM      |   0.59 |             6.06 |
|  2 | BRAP4    | BRADESPAR    |   0.73 |             0.89 |
|  3 | GOAU4    | GERDAU MET   |   0.28 |             5.19 |
|  4 | USIM5    | USIMINAS     |   0.32 |             1.04 |
|  5 | TASA4    | TAURUS ARMAS |   0.54 |             0.63 |
|  6 | GGBR4    | GERDAU       |   0.28 |             0.65 |
|  7 | CMIN3    | CSNMINERACAO |   0.8  |             0.39 |
|  8 | JBSS3    | JBS          |   0.18 |             8.94 |
|  9 | SUZB3    | SUZANO S.A.  |   0.16 |            10.92 |
| 10 | VALE3    | VALE         |   0.45 |             0.42 |
| 11 | ENAT3    | ENAUTA PART  |   0.73 |             0.33 |
| 12 | BEEF3    | MINERVA      |   0.2  |             0.97 |
| 13 | DEXP3    | DEXXOS PAR   |   0.21 |             0.62 |
| 14 | EUCA4    | EUCATEX      |   0.16 |             1.57 |
| 15 | PLPL3    | PLANOE

In [35]:
df_magic.query('codneg == "PRIO3"')

Unnamed: 0,balancing_on,codneg,nomres,doc_env,per_fim,shares_outstanding,net_debt,ebit,roic,market_cap,enterprise_value,earnings_yield,rank_final
180,2017-04-10,PRIO3,PETRORIO,2017-03-28 17:48:01,2016-12-31,13.19,-539.18,259.08,0.88,553.09,1092.27,0.24,1
271,2020-04-09,PRIO3,PETRORIO,2020-02-22 00:56:46,2019-12-31,143.19,1511.96,924.34,0.27,3800.15,2288.19,0.4,2
