# Magic Formula

In [1]:
import numpy as np
import pandas as pd
import string
import warnings
warnings.filterwarnings('ignore')
import requests

In [2]:
url = 'http://www.fundamentus.com.br/resultado.php'

In [3]:
header = {
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36"  
 }

r = requests.get(url, headers=header)

In [4]:
#na tabela do site fundamentus o decimal é colocado como virgula
df = pd.read_html(r.text,  decimal=',', thousands='.')[0] 

In [5]:
df.columns

Index(['Papel', 'Cotação', 'P/L', 'P/VP', 'PSR', 'Div.Yield', 'P/Ativo',
       'P/Cap.Giro', 'P/EBIT', 'P/Ativ Circ.Liq', 'EV/EBIT', 'EV/EBITDA',
       'Mrg Ebit', 'Mrg. Líq.', 'Liq. Corr.', 'ROIC', 'ROE', 'Liq.2meses',
       'Patrim. Líq', 'Dív.Brut/ Patrim.', 'Cresc. Rec.5a'],
      dtype='object')

In [6]:
#Os indicadores com % são raspados como strings
#Este for transforma eles em float
for coluna in ['Div.Yield', 'Mrg Ebit', 'Mrg. Líq.', 'ROIC', 'ROE', 'Cresc. Rec.5a']:
    df[coluna] = df[coluna].str.replace('.', '')
    df[coluna] = df[coluna].str.replace(',', '.')
    df[coluna] = df[coluna].str.rstrip('%').astype('float') / 100

## Analisando os dados 

In [7]:
#liquidez diária 
df = df[df['Liq.2meses'] > 100000]
df.shape                   

(305, 21)

## Joel Greenblat

In [8]:
df[:5]

Unnamed: 0,Papel,Cotação,P/L,P/VP,PSR,Div.Yield,P/Ativo,P/Cap.Giro,P/EBIT,P/Ativ Circ.Liq,...,EV/EBITDA,Mrg Ebit,Mrg. Líq.,Liq. Corr.,ROIC,ROE,Liq.2meses,Patrim. Líq,Dív.Brut/ Patrim.,Cresc. Rec.5a
16,HBSA3,2.6,-242.19,1.49,1.118,0.0,0.311,3.31,6.78,-0.53,...,8.94,0.1649,-0.0046,1.86,0.0536,-0.0061,10950600.0,1330800000.0,3.42,0.1952
24,SMFT3,15.84,-103.84,2.22,8.591,0.0,0.771,5.25,-85.74,-2.15,...,12.32,-0.1002,-0.0827,2.01,-0.0122,-0.0214,33411500.0,4186390000.0,0.82,-0.0287
34,ALPA3,8.43,-51.87,1.04,1.385,0.0,0.708,2.93,13.44,8.65,...,11.11,0.1031,-0.0269,2.5,0.0595,-0.0201,104421.0,5510850000.0,0.25,0.023
36,MGLU3,3.57,-48.29,2.26,0.646,0.0,0.638,3.39,20.7,-4.65,...,12.24,0.0312,-0.0134,1.48,0.0456,-0.0469,392733000.0,10648700000.0,0.67,0.2608
38,ALPA4,7.68,-47.25,0.95,1.262,0.0,0.645,2.67,12.24,7.88,...,10.25,0.1031,-0.0269,2.5,0.0595,-0.0201,64649200.0,5510850000.0,0.25,0.023


In [9]:
data = df
data = data[data['ROE'] > 0.0]
data = data[data['P/L'] > 3.0]
data = data[data['Mrg. Líq.'] > 0.10]
data = data[data['ROIC'] > 0.0]
#data = data[data['EV/EBITDA'] > 0.0]

In [10]:
print(data.shape, df.shape) 

(99, 21) (305, 21)


In [11]:
num_acoes = data.shape[0]

## Rankings

In [12]:
RankingROE = pd.DataFrame()
RankingROE['pos_roe'] = range(1,num_acoes)
RankingPL = pd.DataFrame()
RankingPL['pos_pl'] = range(1,num_acoes)

In [13]:
RankingROE['Papel'] = data.sort_values(by=['ROE'], ascending=False)['Papel'][:(num_acoes-1)].values
RankingROE['ROE Value'] = data.sort_values(by=['ROE'], ascending=False)['ROE'][:(num_acoes-1)].values

In [14]:
RankingPL['Papel'] = data.sort_values(by=['P/L'], ascending=True)['Papel'][:(num_acoes-1)].values
RankingPL['P/L Value'] = data.sort_values(by=['P/L'], ascending=True)['P/L'][:(num_acoes-1)].values

In [15]:
RankingPL[:5]

Unnamed: 0,pos_pl,Papel,P/L Value
0,1,EUCA4,3.13
1,2,GGBR3,3.2
2,3,CBAV3,3.37
3,4,GGBR4,3.56
4,5,CAMB3,3.59


In [16]:
RankingROE[:5]

Unnamed: 0,pos_roe,Papel,ROE Value
0,1,CGAS5,1.754
1,2,UNIP6,0.5722
2,3,UNIP3,0.5722
3,4,KEPL3,0.5226
4,5,TASA4,0.5077


## Soma das posições - Tabela das ações mais descontadas

In [17]:
ranking = pd.merge(RankingPL, RankingROE)

In [18]:
ranking['pts'] = ranking["pos_pl"] + ranking["pos_roe"]
ranking[:3]

Unnamed: 0,pos_pl,Papel,P/L Value,pos_roe,ROE Value,pts
0,1,EUCA4,3.13,78,0.1279,79
1,2,GGBR3,3.2,30,0.2408,32
2,3,CBAV3,3.37,58,0.1703,61


In [19]:
rank = ranking.sort_values('pts')
rank = rank[["Papel","P/L Value","pos_pl","ROE Value","pos_roe","pts"]]
rank.set_index("Papel",inplace=True)
rank.reset_index(inplace=True)
rank.index = rank.index + 1
rank.head(5)

Unnamed: 0,Papel,P/L Value,pos_pl,ROE Value,pos_roe,pts
1,TASA4,3.64,6,0.5077,5,11
2,TASA3,3.66,7,0.5077,6,13
3,KEPL3,3.7,9,0.5226,4,13
4,CAMB3,3.59,5,0.3714,15,20
5,VALE3,3.68,8,0.4344,13,21


## Tabela sem a posição do PL e do ROE

In [20]:
rank = ranking.sort_values('pts')
rank = rank[["Papel","P/L Value","ROE Value","pts"]]
rank.set_index("Papel",inplace=True)
rank.reset_index(inplace=True)
rank.index = rank.index + 1
rank.head(30)

Unnamed: 0,Papel,P/L Value,ROE Value,pts
1,TASA4,3.64,0.5077,11
2,TASA3,3.66,0.5077,13
3,KEPL3,3.7,0.5226,13
4,CAMB3,3.59,0.3714,20
5,VALE3,3.68,0.4344,21
6,UNIP6,4.91,0.5722,25
7,UNIP3,4.7,0.5722,25
8,KLBN11,4.5,0.4567,28
9,KLBN3,4.6,0.4567,28
10,KLBN4,4.49,0.4567,28


In [21]:
def retirar_duplicados(rank):
    Ticker = rank['Papel'].str.extract(r'([A-Z]{4})')
    rank["Ticker"] = Ticker
    rank.set_index("Ticker",inplace=True)
    rank = rank[~rank.index.duplicated(keep='first')]
    rank.reset_index(inplace=True)
    rank.index = rank.index + 1
    rank.drop('Ticker',axis=1,inplace=True)
    return rank


rank = ranking.sort_values('pts')
rank = rank[["Papel","P/L Value","ROE Value","pts"]]
rank = retirar_duplicados(rank)

rank.head(30)

Unnamed: 0,Papel,P/L Value,ROE Value,pts
1,TASA4,3.64,0.5077,11
2,KEPL3,3.7,0.5226,13
3,CAMB3,3.59,0.3714,20
4,VALE3,3.68,0.4344,21
5,UNIP6,4.91,0.5722,25
6,KLBN11,4.5,0.4567,28
7,FESA4,4.03,0.3495,29
8,GGBR3,3.2,0.2408,32
9,RSUL4,6.08,0.4677,43
10,RECV3,4.68,0.3027,44
