## Setup

In [74]:
# carregamento das bibliotecas
import sys
import os
import pandas as pd 
from datetime import date, timedelta
# Adiciona a pasta raiz do projeto (onde está a pasta src) ao sys.path
sys.path.append(os.path.abspath(os.path.join("..")))
from src.config.logging_config import setup_logging
from src.data.load_data import carregar_dataset
from src.data.preprocessing import *

In [75]:
# configuração do logging
setup_logging()


## Carregamento dos dados

In [76]:
# carregamento da tabela de ações componentes do ibovespa com dados  setorais
tbl_carteira_ibovespa = carregar_dataset(path='../data/external/tbl_acoes_ibovespa.csv')
tbl_carteira_ibovespa

2025-08-04 02:10:40,730 | INFO | src.data.load_data | Captura do arquivo csv no path:../data/external/tbl_acoes_ibovespa.csv


Unnamed: 0,codigo,tipo,empresa,slug,setor,industria,ticker
0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,ABEV3.SA
1,ALOS3,ON,Allos,alos3,Imóveis,Atividades Imobiliárias,ALOS3.SA
2,ASAI3,ON,Assaí­ Atacadista,assai-asai3,Consumo não-cíclico,Varejo de alimentos e medicamentos,ASAI3.SA
3,AURE3,ON,Auren,auren-energia-aure3,Utilidades,Empresas independentes e concessionárias elétr...,AURE3.SA
4,AZZA3,ON,Azzas 2154,azzas-2154-azza3,Consumo cíclico,Varejistas especializados,AZZA3.SA
...,...,...,...,...,...,...,...
79,CPLE6,PNB,Companhia Paranaense de Energia COPEL,copel-pnb,Utilidades,Empresas independentes e concessionárias elétr...,CPLE6.SA
80,TIMS3,ON,TIM SA,tim-part-s-a-on-nm,Tecnologia,Serviços de telecomunicação,TIMS3.SA
81,VAMO3,ON,Grupo Vamos,grupo-vamos-sa,Indústria,Serviços de frete e logística,VAMO3.SA
82,VBBR3,ON,Vibra Energia SA,petrobras-distribuidora,Energia,Gás e Petróleo,VBBR3.SA


In [77]:
# carregamento da base historica de ações componentes do ibovespa para validação dos dados
tbl_cotacao_ibovespa = carregar_dataset(path='../data/raw/tbl_acoes_ibovespa_historico.csv')
tbl_cotacao_ibovespa.head()

2025-08-04 02:10:40,789 | INFO | src.data.load_data | Captura do arquivo csv no path:../data/raw/tbl_acoes_ibovespa_historico.csv


Unnamed: 0,Date,ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,trailingPE,priceToBook,returnOnEquity,profitMargins,revenueGrowth,dividendYield,totalDebt,freeCashflow
0,2025-07-14 00:00:00-03:00,ABEV3,13.26,13.57,13.26,13.29,36420400,0.0,0.0,13.215054,2.034432,0.1624,0.15742,0.11,7.41,3218170112,18302230000.0
1,2025-07-15 00:00:00-03:00,ABEV3,13.37,13.52,13.23,13.27,27675300,0.0,0.0,13.215054,2.034432,0.1624,0.15742,0.11,7.41,3218170112,18302230000.0
2,2025-07-16 00:00:00-03:00,ABEV3,13.38,13.73,13.31,13.68,48903600,0.0,0.0,13.215054,2.034432,0.1624,0.15742,0.11,7.41,3218170112,18302230000.0
3,2025-07-17 00:00:00-03:00,ABEV3,13.67,13.69,13.44,13.46,19213600,0.0,0.0,13.215054,2.034432,0.1624,0.15742,0.11,7.41,3218170112,18302230000.0
4,2025-07-18 00:00:00-03:00,ABEV3,13.33,13.49,13.29,13.32,23902900,0.0,0.0,13.215054,2.034432,0.1624,0.15742,0.11,7.41,3218170112,18302230000.0


In [78]:
# concatenação dos dados setoriais com a tabela de cotação
tbl_cotacao_ibovespa = tbl_cotacao_ibovespa.merge(tbl_carteira_ibovespa.drop('ticker', axis=1), left_on='ticker', right_on='codigo', how='left')
tbl_cotacao_ibovespa.head()

Unnamed: 0,Date,ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,trailingPE,...,revenueGrowth,dividendYield,totalDebt,freeCashflow,codigo,tipo,empresa,slug,setor,industria
0,2025-07-14 00:00:00-03:00,ABEV3,13.26,13.57,13.26,13.29,36420400,0.0,0.0,13.215054,...,0.11,7.41,3218170112,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas
1,2025-07-15 00:00:00-03:00,ABEV3,13.37,13.52,13.23,13.27,27675300,0.0,0.0,13.215054,...,0.11,7.41,3218170112,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas
2,2025-07-16 00:00:00-03:00,ABEV3,13.38,13.73,13.31,13.68,48903600,0.0,0.0,13.215054,...,0.11,7.41,3218170112,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas
3,2025-07-17 00:00:00-03:00,ABEV3,13.67,13.69,13.44,13.46,19213600,0.0,0.0,13.215054,...,0.11,7.41,3218170112,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas
4,2025-07-18 00:00:00-03:00,ABEV3,13.33,13.49,13.29,13.32,23902900,0.0,0.0,13.215054,...,0.11,7.41,3218170112,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas


In [79]:
# transformação da coluna date em index do dataset
tbl_cotacao_ibovespa.set_index('Date', drop=True, inplace=True)
tbl_cotacao_ibovespa.index = pd.to_datetime(tbl_cotacao_ibovespa.index)

In [80]:
# inserção de dados temporais no dataset

tbl_cotacao_ibovespa = dados_temporais(df=tbl_cotacao_ibovespa)
tbl_cotacao_ibovespa.head()


Unnamed: 0_level_0,ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,trailingPE,priceToBook,...,freeCashflow,codigo,tipo,empresa,slug,setor,industria,dayofweek,month,Feriado
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-07-14 00:00:00-03:00,ABEV3,13.26,13.57,13.26,13.29,36420400,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,0,7,False
2025-07-15 00:00:00-03:00,ABEV3,13.37,13.52,13.23,13.27,27675300,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,1,7,False
2025-07-16 00:00:00-03:00,ABEV3,13.38,13.73,13.31,13.68,48903600,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,2,7,False
2025-07-17 00:00:00-03:00,ABEV3,13.67,13.69,13.44,13.46,19213600,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,3,7,False
2025-07-18 00:00:00-03:00,ABEV3,13.33,13.49,13.29,13.32,23902900,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,4,7,False


P/L (Preço sobre Lucro) (trailingPE): Quanto o mercado paga por cada real de lucro da empresa. Ajuda a ver se a ação está cara ou barata em relação ao lucro.

P/VP (Preço sobre Valor Patrimonial) (priceToBook): Compara o preço da ação com o valor contábil dos ativos da empresa. Indica se a ação está acima ou abaixo do seu valor patrimonial.

ROE (Retorno sobre o Patrimônio Líquido) (returnOnEquity): Rentabilidade da empresa sobre o capital próprio investido pelos acionistas. Um ROE alto mostra boa gestão.

Margem Líquida (profitMargins): Porcentagem da receita que se transforma em lucro líquido. Indica a eficiência da empresa em gerar lucro das vendas.

Crescimento de Receita (revenueGrowth): Aumento percentual das vendas da empresa. Essencial para avaliar a expansão e potencial de mercado.

Dividend Yield (dividendYield): Retorno em dividendos que a ação oferece em relação ao seu preço. Importante para quem busca renda passiva.

Dívida Total (totalDebt): Montante total de obrigações financeiras da empresa. Ajuda a avaliar a saúde financeira e o risco.

Fluxo de Caixa Livre (freeCashflow): Dinheiro que sobra para a empresa após todas as despesas e investimentos. Sinaliza a capacidade da empresa de gerar caixa puro.

## Análise

In [81]:
# informações da tabela
tbl_cotacao_ibovespa.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 840 entries, 2025-07-14 00:00:00-03:00 to 2025-07-25 00:00:00-03:00
Data columns (total 25 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ticker          840 non-null    object 
 1   Open            840 non-null    float64
 2   High            840 non-null    float64
 3   Low             840 non-null    float64
 4   Close           840 non-null    float64
 5   Volume          840 non-null    int64  
 6   Dividends       840 non-null    float64
 7   Stock Splits    840 non-null    float64
 8   trailingPE      720 non-null    float64
 9   priceToBook     840 non-null    float64
 10  returnOnEquity  830 non-null    float64
 11  profitMargins   840 non-null    float64
 12  revenueGrowth   830 non-null    float64
 13  dividendYield   740 non-null    float64
 14  totalDebt       840 non-null    int64  
 15  freeCashflow    780 non-null    float64
 16  codigo          840 non-null   

In [82]:
# verificação da quantidade de ações por industria
empresas_por_industria = agrupar_dados(df=tbl_cotacao_ibovespa, cols_agrup=['setor', 'industria'], cols_filter=['ticker'], agr='nunique').reset_index()
empresas_por_industria.head()

2025-08-04 02:10:40,899 | INFO | src.data.preprocessing | Agrupamento selecionado: ['setor', 'industria'], filtragem dataset:['ticker'], método: nunique


Unnamed: 0,setor,industria,ticker
0,Saúde,Prestação de serviços de Saúde,1
1,Serviços acadêmicos e educacionais,"Colégio, faculdade e universidade",1
2,Consumo cíclico,Serviços de hotel e entretenimento,2
3,Consumo cíclico,Automóveis e peças,1
4,Consumo cíclico,Varejistas especializados,4


In [83]:
industrias_por_setor = tbl_cotacao_ibovespa.groupby('setor')['industria'].nunique().reset_index(name='count_industria_setor')
industrias_por_setor

Unnamed: 0,setor,count_industria_setor
0,Saúde,1
1,Serviços acadêmicos e educacionais,1
2,Consumo cíclico,4
3,Consumo não-cíclico,6
4,Energia,1
5,Financeiro,3
6,Imóveis,2
7,Indústria,5
8,Materiais básicos,4
9,Saúde,2


In [84]:
# verificação da quantidade de ações por setor
empresas_por_setor = tbl_cotacao_ibovespa.groupby(['setor'])['ticker'].nunique().reset_index(name='count_ticker_setor')
empresas_por_setor.head()

Unnamed: 0,setor,count_ticker_setor
0,Saúde,1
1,Serviços acadêmicos e educacionais,1
2,Consumo cíclico,8
3,Consumo não-cíclico,11
4,Energia,8


In [85]:
# verificação de nulos
tbl_cotacao_ibovespa.isna().sum()

ticker              0
Open                0
High                0
Low                 0
Close               0
Volume              0
Dividends           0
Stock Splits        0
trailingPE        120
priceToBook         0
returnOnEquity     10
profitMargins       0
revenueGrowth      10
dividendYield     100
totalDebt           0
freeCashflow       60
codigo              0
tipo                0
empresa             0
slug                0
setor               0
industria           0
dayofweek           0
month               0
Feriado             0
dtype: int64

In [86]:
valores_nulos = tbl_cotacao_ibovespa[tbl_cotacao_ibovespa.isnull().any(axis=1)][['ticker', 'trailingPE', 'dividendYield', 'freeCashflow']]
valores_nulos.head()

Unnamed: 0_level_0,ticker,trailingPE,dividendYield,freeCashflow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-07-14 00:00:00-03:00,BBAS3,4.816273,1.84,
2025-07-15 00:00:00-03:00,BBAS3,4.816273,1.84,
2025-07-16 00:00:00-03:00,BBAS3,4.816273,1.84,
2025-07-17 00:00:00-03:00,BBAS3,4.816273,1.84,
2025-07-18 00:00:00-03:00,BBAS3,4.816273,1.84,


# Prenchimento de nulos
Para novos dados no pipeline, usaremos o método de preenchimento ffill ou com a média movel até o dia anterior para preenchimento dos valores nulos no glue.

In [87]:
# tratamento dos valores nulos: interpolação em cascata
tbl_cotacao_ibovespa = tratamento_nulo_dados_setor_industria(df=tbl_cotacao_ibovespa, colunas=['trailingPE','returnOnEquity','revenueGrowth','dividendYield','freeCashflow'])
tbl_cotacao_ibovespa.head()

2025-08-04 02:10:41,020 | INFO | src.data.preprocessing | Iniciando a preparação do dataset para interpolação.
2025-08-04 02:10:41,036 | INFO | src.data.preprocessing | Contagens por setor e indústria adicionadas ao DataFrame.
2025-08-04 02:10:41,037 | INFO | src.data.preprocessing | Processando coluna: trailingPE
2025-08-04 02:10:41,067 | INFO | src.data.preprocessing | Processando coluna: returnOnEquity
2025-08-04 02:10:41,078 | INFO | src.data.preprocessing | Processando coluna: revenueGrowth
2025-08-04 02:10:41,089 | INFO | src.data.preprocessing | Processando coluna: dividendYield
2025-08-04 02:10:41,105 | INFO | src.data.preprocessing | Processando coluna: freeCashflow
2025-08-04 02:10:41,116 | INFO | src.data.preprocessing | Aplicando ffill() e bfill() finais.
2025-08-04 02:10:41,132 | INFO | src.data.preprocessing | Preenchimento de nulos concluído com sucesso.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_transf[coluna].fillna(media, inplace=True)


Unnamed: 0_level_0,ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,trailingPE,priceToBook,...,freeCashflow,codigo,tipo,empresa,slug,setor,industria,dayofweek,month,Feriado
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-07-14 00:00:00-03:00,ABEV3,13.26,13.57,13.26,13.29,36420400,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,0,7,False
2025-07-15 00:00:00-03:00,ABEV3,13.37,13.52,13.23,13.27,27675300,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,1,7,False
2025-07-16 00:00:00-03:00,ABEV3,13.38,13.73,13.31,13.68,48903600,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,2,7,False
2025-07-17 00:00:00-03:00,ABEV3,13.67,13.69,13.44,13.46,19213600,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,3,7,False
2025-07-18 00:00:00-03:00,ABEV3,13.33,13.49,13.29,13.32,23902900,0.0,0.0,13.215054,2.034432,...,18302230000.0,ABEV3,ON,Ambev S/A,ambev-abev3,Consumo não-cíclico,Bebidas,4,7,False


In [88]:
# verificação de duplicados
tbl_carteira_ibovespa.duplicated().sum()

0

In [None]:
# análise descritivos interativa dos dados
boxplot_analise_descritiva_categorica(tbl_cotacao_ibovespa,distribuicao=(0.1,0.25,0.5,0.75,0.95,0.99), feature='ticker')

interactive(children=(Dropdown(description='coluna', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

In [90]:
# histograma do target

In [91]:
# histograma do target com escala
histograma_feature_categorica(tbl_cotacao_ibovespa[['ticker','Close']],feature='ticker')

interactive(children=(Dropdown(description='coluna', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

# Feature Engineering

In [92]:
# transformação cíclica para dias uteis 
tbl_cotacao_ibovespa = transformacao_ciclica(df=tbl_cotacao_ibovespa, dias_uteis=True)

2025-08-04 02:10:41,647 | INFO | src.data.preprocessing | Transformação cíclica com dias úteis para as colunas de dados temporais.


# análise de estacionariedade da serie para definição de lags e médias móveis

In [93]:
# teste de estacionariedade
testar_estacionariedade_interativo(df= tbl_cotacao_ibovespa, coluna_valor = 'Close')

interactive(children=(Dropdown(description='ticker', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

In [94]:
# gerar diferenciação do target para revalidar a estacionariedade da serie
tbl_cotacao_ibovespa = diferenciar_serie_temporal(df= tbl_cotacao_ibovespa, target = 'Close')


In [95]:
# teste de estacionariedade com diferenciacao do target
testar_estacionariedade_interativo(df= tbl_cotacao_ibovespa, coluna_valor = 'Close_diff')

interactive(children=(Dropdown(description='ticker', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

In [96]:
# decomposicao da serie temporal diferenciada
grafico_decomposicao_temporal_interativo(df= tbl_cotacao_ibovespa, target = 'Close', period=5)

interactive(children=(Dropdown(description='ticker', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

In [97]:
# gráfico de autocorrelacao para definição do número de lags e médias móveis
grafico_acf_interativo(df=tbl_cotacao_ibovespa,coluna_valor = 'Close_diff',max_lags=8)

interactive(children=(Dropdown(description='ticker', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

In [98]:
# gráfico de autocorrelacao parcial para definição do número de lags e médias móveis
grafico_pacf_interativo(df=tbl_cotacao_ibovespa,coluna_valor = 'Close_diff',max_lags=9)

interactive(children=(Dropdown(description='ticker', options=('ABEV3', 'ALOS3', 'ASAI3', 'AURE3', 'AZZA3', 'B3…

In [99]:
tbl_cotacao_ibovespa

Unnamed: 0_level_0,ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,trailingPE,priceToBook,...,setor,industria,dayofweek,month,Feriado,day_sin,day_cos,month_sin,month_cos,Close_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-07-15 00:00:00-03:00,ABEV3,13.37,13.52,13.23,13.27,27675300,0.0,0.0,13.215054,2.034432,...,Consumo não-cíclico,Bebidas,1,7,False,0.951057,0.309017,-0.5,-0.866025,-0.02
2025-07-16 00:00:00-03:00,ABEV3,13.38,13.73,13.31,13.68,48903600,0.0,0.0,13.215054,2.034432,...,Consumo não-cíclico,Bebidas,2,7,False,0.587785,-0.809017,-0.5,-0.866025,0.41
2025-07-17 00:00:00-03:00,ABEV3,13.67,13.69,13.44,13.46,19213600,0.0,0.0,13.215054,2.034432,...,Consumo não-cíclico,Bebidas,3,7,False,-0.587785,-0.809017,-0.5,-0.866025,-0.22
2025-07-18 00:00:00-03:00,ABEV3,13.33,13.49,13.29,13.32,23902900,0.0,0.0,13.215054,2.034432,...,Consumo não-cíclico,Bebidas,4,7,False,-0.951057,0.309017,-0.5,-0.866025,-0.14
2025-07-21 00:00:00-03:00,ABEV3,13.35,13.45,13.26,13.42,21355000,0.0,0.0,13.215054,2.034432,...,Consumo não-cíclico,Bebidas,0,7,False,0.000000,1.000000,-0.5,-0.866025,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-21 00:00:00-03:00,YDUQ3,13.38,13.38,12.91,13.13,5779300,0.0,0.0,11.720721,1.098539,...,Serviços acadêmicos e educacionais,"Colégio, faculdade e universidade",0,7,False,0.000000,1.000000,-0.5,-0.866025,-0.17
2025-07-22 00:00:00-03:00,YDUQ3,13.22,13.30,12.89,12.96,4888900,0.0,0.0,11.720721,1.098539,...,Serviços acadêmicos e educacionais,"Colégio, faculdade e universidade",1,7,False,0.951057,0.309017,-0.5,-0.866025,-0.17
2025-07-23 00:00:00-03:00,YDUQ3,12.99,13.15,12.85,13.01,3218100,0.0,0.0,11.720721,1.098539,...,Serviços acadêmicos e educacionais,"Colégio, faculdade e universidade",2,7,False,0.587785,-0.809017,-0.5,-0.866025,0.05
2025-07-24 00:00:00-03:00,YDUQ3,12.87,13.00,12.60,13.00,3312600,0.0,0.0,11.720721,1.098539,...,Serviços acadêmicos e educacionais,"Colégio, faculdade e universidade",3,7,False,-0.587785,-0.809017,-0.5,-0.866025,-0.01


In [100]:
# inserção das colunas de janela para o target com e sem diferenciacao
tbl_cotacao_ibovespa = gerar_features_temporais(df=tbl_cotacao_ibovespa,coluna_valor='Close')
tbl_cotacao_ibovespa = gerar_features_temporais(df=tbl_cotacao_ibovespa,coluna_valor='Close_diff')
tbl_cotacao_ibovespa

Unnamed: 0_level_0,ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,trailingPE,priceToBook,...,lag_1_Close_diff,lag_2_Close_diff,lag_3_Close_diff,lag_5_Close_diff,rolling_mean_3_Close_diff,volatility_3_Close_diff,retorno_acumulado_3_Close_diff,rolling_mean_5_Close_diff,volatility_5_Close_diff,retorno_acumulado_5_Close_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-07-15 00:00:00-03:00,ABEV3,13.37,13.52,13.23,13.27,27675300,0.0,0.0,13.215054,2.034432,...,,,,,,,,,,
2025-07-16 00:00:00-03:00,ABEV3,13.38,13.73,13.31,13.68,48903600,0.0,0.0,13.215054,2.034432,...,-0.02,,,,,,,,,
2025-07-17 00:00:00-03:00,ABEV3,13.67,13.69,13.44,13.46,19213600,0.0,0.0,13.215054,2.034432,...,0.41,-0.020000,,,,,,,,
2025-07-18 00:00:00-03:00,ABEV3,13.33,13.49,13.29,13.32,23902900,0.0,0.0,13.215054,2.034432,...,-0.22,0.410000,-0.020000,,0.056667,0.321921,0.170000,,,
2025-07-21 00:00:00-03:00,ABEV3,13.35,13.45,13.26,13.42,21355000,0.0,0.0,13.215054,2.034432,...,-0.14,-0.220000,0.410000,,0.016666,0.342977,0.049999,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-21 00:00:00-03:00,YDUQ3,13.38,13.38,12.91,13.13,5779300,0.0,0.0,11.720721,1.098539,...,-1.07,0.099999,0.050000,,-0.306667,0.661538,-0.920000,,,
2025-07-22 00:00:00-03:00,YDUQ3,13.22,13.30,12.89,12.96,4888900,0.0,0.0,11.720721,1.098539,...,-0.17,-1.070000,0.099999,0.370000,-0.380000,0.612617,-1.140000,-0.144,0.552159,-0.72
2025-07-23 00:00:00-03:00,YDUQ3,12.99,13.15,12.85,13.01,3218100,0.0,0.0,11.720721,1.098539,...,-0.17,-0.170000,-1.070000,0.050000,-0.470000,0.519615,-1.410000,-0.252,0.473730,-1.26
2025-07-24 00:00:00-03:00,YDUQ3,12.87,13.00,12.60,13.00,3312600,0.0,0.0,11.720721,1.098539,...,0.05,-0.170000,-0.170000,0.099999,-0.096667,0.127017,-0.290000,-0.252,0.473730,-1.26


# Considerações sobre colunas criadas para teste de modelo

In [101]:
# salvar o dataset para treino e validação dos modelos
tbl_cotacao_ibovespa.to_csv('../data/processed/tbl_cotacao_ibovespa_processed.csv')