In [8]:
# Importar bibliotecas
import pandas as pd
import numpy as np
import yfinance as yf
import os

start_date = '2007-01-01'
end_date = '2024-01-01'
ticker = "AMER3"

# Carregar dados
data = pd.read_csv(f"../../data/raw/indicadores_{ticker}.csv")

# Baixar os dados do Yahoo Finance
precos = yf.download((ticker+'.SA'), start=start_date, end=end_date, interval="1wk")

[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['BIDI11.SA']: Exception('%ticker%: No timezone found, symbol may be delisted')


In [5]:
data.head()

Unnamed: 0,Tipo do Indicador,Nome do Indicador,ATUAL,2023,2022,2021,2020,2019,2018,2017,2016,2015
0,INDICADORES DE VALUATION,D.Y,-%,-%,-%,"0,27%","0,17%","0,23%",-%,-%,-%,-%
1,INDICADORES DE VALUATION,P/L,-23807,-23807,-,-33796,"-3.485,69",2000,-,-,-,-
2,INDICADORES DE VALUATION,PEG RATIO,013,013,-,-,-,-,-,-,-,-
3,INDICADORES DE VALUATION,P/VP,108,108,-,293,760,072,-,-,-,-
4,INDICADORES DE VALUATION,EV/EBITDA,-,-,-,-15595,-79222,1789,-,-,-,-


In [6]:
precos_df = pd.DataFrame(precos)

precos_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [7]:
# Função para calcular a média do preço de fechamento para um determinado ano
def close_price_mean_for_year(df, year):
    df_year = df[df.index.year == year]
    return df_year['Close'].mean()

In [6]:
# Remover a coluna "Tipo do Indicador"
data = data.drop('Tipo do Indicador', axis=1)

# Renomear a coluna "ATUAL" para "2024"
data = data.rename(columns={'ATUAL': '2024'})

In [7]:
df = pd.DataFrame(data)

# Remover caracteres especiais e ajustar o separador decimal
for col in df.columns[1:]:
    df[col] = df[col].str.replace(",", ".").str.rstrip("%")


# Substituir '-' por NaN em todas as colunas exceto as duas primeiras
df.iloc[:, 1:] = df.iloc[:, 1:].replace('-', np.nan)


In [8]:
# Converter as colunas para tipo numérico
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col], errors='coerce')

df.head()

Unnamed: 0,Nome do Indicador,2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009
0,D.Y,,,6.43,,,,,,,,,,,0.62,,
1,P/L,-0.04,-0.06,-0.67,-4.58,-200.85,-90.32,-47.97,-22.54,-5.4,-9.3,-21.85,-14.98,-15.92,-11.46,,
2,PEG RATIO,0.0,0.0,,,,,14.08,1.47,-0.33,-0.06,-9.29,2.3,-0.17,,,
3,P/VP,-0.02,-0.03,-0.33,1.79,4.46,5.01,5.39,2.37,0.85,1.44,1.16,2.88,2.8,0.9,,
4,EV/EBITDA,-2.77,-2.82,-4.09,-7.52,44.99,45.54,50.62,31.93,7.54,8.47,7.68,10.88,16.15,7.1,,


In [9]:
# Transpor o DataFrame
df_transposed = df.transpose()

# Resetar o índice
df_transposed = df_transposed.reset_index()


# Definir a primeira linha como o cabeçalho
df_transposed.columns = df_transposed.iloc[0]

# Remover a primeira linha do DataFrame
df_transposed = df_transposed[1:]

# Resetar o índice
df_transposed = df_transposed.reset_index(drop=True)

#Troca o nome da coluna Nome do Indicador para Ano
df_transposed = df_transposed.rename(columns={'Nome do Indicador': 'Ano'})

df_transposed

Unnamed: 0,Ano,D.Y,P/L,PEG RATIO,P/VP,EV/EBITDA,EV/EBIT,P/EBITDA,P/EBIT,VPA,...,M. BRUTA,M. EBITDA,M. EBIT,M. LÍQUIDA,ROE,ROA,ROIC,GIRO ATIVOS,CAGR RECEITAS 5 ANOS,CAGR LUCROS 5 ANOS
0,2024,,-0.04,0.0,-0.02,-2.77,-2.2,-0.08,-0.06,-29.55,...,19.46,-24.06,-30.26,-50.03,-48.42,-41.26,202.73,0.82,32.64,
1,2023,,-0.06,0.0,-0.03,-2.82,-2.24,-0.13,-0.11,-29.55,...,19.46,-24.06,-30.26,-50.03,-48.42,-41.26,202.73,0.82,32.64,
2,2022,6.43,-0.67,,-0.33,-4.09,-3.25,-1.4,-1.11,-29.56,...,19.46,-24.06,-30.26,-50.03,-48.42,-41.26,202.73,0.82,32.64,
3,2021,,-4.58,,1.79,-7.52,-5.63,-8.5,-6.36,17.61,...,17.28,-14.91,-19.94,-27.7,-148.26,-14.01,-130.72,0.51,21.23,
4,2020,,-200.85,,4.46,44.99,216.24,48.77,234.43,16.94,...,28.42,8.57,1.78,-2.08,-2.22,-0.93,0.82,0.45,2.35,
5,2019,,-90.32,,5.01,45.54,276.83,45.62,277.31,12.54,...,29.72,9.31,1.53,-4.7,-5.55,-1.98,-0.34,0.42,-3.2,
6,2018,,-47.97,14.08,5.39,50.62,-913.96,46.24,-834.81,7.8,...,25.81,6.35,-0.35,-6.13,-11.24,-3.05,-2.06,0.5,1.28,
7,2017,,-22.54,1.47,2.37,31.93,,26.78,843.53,8.63,...,21.14,5.51,0.17,-6.55,-10.53,-3.26,-1.95,0.5,5.49,
8,2016,,-5.4,-0.33,0.85,7.54,13.64,4.02,7.27,11.96,...,19.91,7.59,4.2,-5.65,-15.79,-4.74,1.69,0.84,15.24,
9,2015,,-9.3,-0.06,1.44,8.47,13.4,5.97,9.44,10.59,...,19.83,7.23,4.57,-4.64,-15.46,-4.21,5.79,0.91,17.22,


In [10]:
#adicionando o ticker
df_transposed['Ticker'] = ticker

#seta o indice como o ticker e ano
#df_transposed = df_transposed.set_index(['Ticker','Ano'])

df_transposed

Unnamed: 0,Ano,D.Y,P/L,PEG RATIO,P/VP,EV/EBITDA,EV/EBIT,P/EBITDA,P/EBIT,VPA,...,M. EBITDA,M. EBIT,M. LÍQUIDA,ROE,ROA,ROIC,GIRO ATIVOS,CAGR RECEITAS 5 ANOS,CAGR LUCROS 5 ANOS,Ticker
0,2024,,-0.04,0.0,-0.02,-2.77,-2.2,-0.08,-0.06,-29.55,...,-24.06,-30.26,-50.03,-48.42,-41.26,202.73,0.82,32.64,,AMER3
1,2023,,-0.06,0.0,-0.03,-2.82,-2.24,-0.13,-0.11,-29.55,...,-24.06,-30.26,-50.03,-48.42,-41.26,202.73,0.82,32.64,,AMER3
2,2022,6.43,-0.67,,-0.33,-4.09,-3.25,-1.4,-1.11,-29.56,...,-24.06,-30.26,-50.03,-48.42,-41.26,202.73,0.82,32.64,,AMER3
3,2021,,-4.58,,1.79,-7.52,-5.63,-8.5,-6.36,17.61,...,-14.91,-19.94,-27.7,-148.26,-14.01,-130.72,0.51,21.23,,AMER3
4,2020,,-200.85,,4.46,44.99,216.24,48.77,234.43,16.94,...,8.57,1.78,-2.08,-2.22,-0.93,0.82,0.45,2.35,,AMER3
5,2019,,-90.32,,5.01,45.54,276.83,45.62,277.31,12.54,...,9.31,1.53,-4.7,-5.55,-1.98,-0.34,0.42,-3.2,,AMER3
6,2018,,-47.97,14.08,5.39,50.62,-913.96,46.24,-834.81,7.8,...,6.35,-0.35,-6.13,-11.24,-3.05,-2.06,0.5,1.28,,AMER3
7,2017,,-22.54,1.47,2.37,31.93,,26.78,843.53,8.63,...,5.51,0.17,-6.55,-10.53,-3.26,-1.95,0.5,5.49,,AMER3
8,2016,,-5.4,-0.33,0.85,7.54,13.64,4.02,7.27,11.96,...,7.59,4.2,-5.65,-15.79,-4.74,1.69,0.84,15.24,,AMER3
9,2015,,-9.3,-0.06,1.44,8.47,13.4,5.97,9.44,10.59,...,7.23,4.57,-4.64,-15.46,-4.21,5.79,0.91,17.22,,AMER3


In [11]:
df_transposed['D.Y'] = df_transposed['D.Y'].astype(float).fillna(0)


In [13]:
# Aplicar a função para cada linha (ano) do DataFrame
df_transposed['PrecoAnoSeguinte'] = df_transposed['Ano'].astype(int).apply(lambda x: close_price_mean_for_year(precos_df, x + 1))
df_transposed['PrecoAnoAtual'] = df_transposed['Ano'].astype(int).apply(lambda x: close_price_mean_for_year(precos_df, x))

In [14]:
df_transposed

Unnamed: 0,Ano,D.Y,P/L,PEG RATIO,P/VP,EV/EBITDA,EV/EBIT,P/EBITDA,P/EBIT,VPA,...,M. LÍQUIDA,ROE,ROA,ROIC,GIRO ATIVOS,CAGR RECEITAS 5 ANOS,CAGR LUCROS 5 ANOS,Ticker,PrecoAnoSeguinte,PrecoAnoAtual
0,2024,0.0,-0.04,0.0,-0.02,-2.77,-2.2,-0.08,-0.06,-29.55,...,-50.03,-48.42,-41.26,202.73,0.82,32.64,,AMER3,,
1,2023,0.0,-0.06,0.0,-0.03,-2.82,-2.24,-0.13,-0.11,-29.55,...,-50.03,-48.42,-41.26,202.73,0.82,32.64,,AMER3,,1.239423
2,2022,6.43,-0.67,,-0.33,-4.09,-3.25,-1.4,-1.11,-29.56,...,-50.03,-48.42,-41.26,202.73,0.82,32.64,,AMER3,1.239423,19.961902
3,2021,0.0,-4.58,,1.79,-7.52,-5.63,-8.5,-6.36,17.61,...,-27.7,-148.26,-14.01,-130.72,0.51,21.23,,AMER3,19.961902,54.292429
4,2020,0.0,-200.85,,4.46,44.99,216.24,48.77,234.43,16.94,...,-2.08,-2.22,-0.93,0.82,0.45,2.35,,AMER3,54.292429,83.763484
5,2019,0.0,-90.32,,5.01,45.54,276.83,45.62,277.31,12.54,...,-4.7,-5.55,-1.98,-0.34,0.42,-3.2,,AMER3,83.763484,44.422621
6,2018,0.0,-47.97,14.08,5.39,50.62,-913.96,46.24,-834.81,7.8,...,-6.13,-11.24,-3.05,-2.06,0.5,1.28,,AMER3,44.422621,27.896747
7,2017,0.0,-22.54,1.47,2.37,31.93,,26.78,843.53,8.63,...,-6.55,-10.53,-3.26,-1.95,0.5,5.49,,AMER3,27.896747,14.912225
8,2016,0.0,-5.4,-0.33,0.85,7.54,13.64,4.02,7.27,11.96,...,-5.65,-15.79,-4.74,1.69,0.84,15.24,,AMER3,14.912225,12.437
9,2015,0.0,-9.3,-0.06,1.44,8.47,13.4,5.97,9.44,10.59,...,-4.64,-15.46,-4.21,5.79,0.91,17.22,,AMER3,12.437,18.472331


In [15]:
# Criar o campo alvo
df_transposed['Alvo'] = np.where(df_transposed['PrecoAnoSeguinte'] > df_transposed['PrecoAnoAtual'] * 1.15, 'Barata',
                        np.where(df_transposed['PrecoAnoSeguinte'] < df_transposed['PrecoAnoAtual'] * 0.85, 'Cara', 'Neutra'))

In [16]:
df_transposed


Unnamed: 0,Ano,D.Y,P/L,PEG RATIO,P/VP,EV/EBITDA,EV/EBIT,P/EBITDA,P/EBIT,VPA,...,ROE,ROA,ROIC,GIRO ATIVOS,CAGR RECEITAS 5 ANOS,CAGR LUCROS 5 ANOS,Ticker,PrecoAnoSeguinte,PrecoAnoAtual,Alvo
0,2024,0.0,-0.04,0.0,-0.02,-2.77,-2.2,-0.08,-0.06,-29.55,...,-48.42,-41.26,202.73,0.82,32.64,,AMER3,,,Neutra
1,2023,0.0,-0.06,0.0,-0.03,-2.82,-2.24,-0.13,-0.11,-29.55,...,-48.42,-41.26,202.73,0.82,32.64,,AMER3,,1.239423,Neutra
2,2022,6.43,-0.67,,-0.33,-4.09,-3.25,-1.4,-1.11,-29.56,...,-48.42,-41.26,202.73,0.82,32.64,,AMER3,1.239423,19.961902,Cara
3,2021,0.0,-4.58,,1.79,-7.52,-5.63,-8.5,-6.36,17.61,...,-148.26,-14.01,-130.72,0.51,21.23,,AMER3,19.961902,54.292429,Cara
4,2020,0.0,-200.85,,4.46,44.99,216.24,48.77,234.43,16.94,...,-2.22,-0.93,0.82,0.45,2.35,,AMER3,54.292429,83.763484,Cara
5,2019,0.0,-90.32,,5.01,45.54,276.83,45.62,277.31,12.54,...,-5.55,-1.98,-0.34,0.42,-3.2,,AMER3,83.763484,44.422621,Barata
6,2018,0.0,-47.97,14.08,5.39,50.62,-913.96,46.24,-834.81,7.8,...,-11.24,-3.05,-2.06,0.5,1.28,,AMER3,44.422621,27.896747,Barata
7,2017,0.0,-22.54,1.47,2.37,31.93,,26.78,843.53,8.63,...,-10.53,-3.26,-1.95,0.5,5.49,,AMER3,27.896747,14.912225,Barata
8,2016,0.0,-5.4,-0.33,0.85,7.54,13.64,4.02,7.27,11.96,...,-15.79,-4.74,1.69,0.84,15.24,,AMER3,14.912225,12.437,Barata
9,2015,0.0,-9.3,-0.06,1.44,8.47,13.4,5.97,9.44,10.59,...,-15.46,-4.21,5.79,0.91,17.22,,AMER3,12.437,18.472331,Cara


In [19]:
# Verificar se o diretório existe, caso contrário, criar
output_dir = '../../data/processed'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Salvar o DataFrame em um arquivo parquet
output_file = os.path.join(output_dir, f'dados_{ticker}.parquet')
df_transposed.to_parquet(output_file)