### Feature Engineering

#### Executando o pre processamento dos dados

In [12]:
from datetime import datetime
import pandas as pd
import warnings
warnings.filterwarnings("ignore")


url = 'http://www.ipeadata.gov.br/ExibeSerie.aspx?module=m&serid=1650971490&oper=view'
tables = pd.read_html(url, decimal=',', thousands='.')
df = tables[2]

df = df.drop(index=0).reset_index(drop=True)
df.columns = ['data', 'preco']

df['data'] = pd.to_datetime(df['data'], dayfirst=True, errors='coerce')
df['preco'] = pd.to_numeric(df['preco'], errors='coerce')

df.sort_values(by='data', inplace=True)
df.set_index('data', inplace=True)

startDate = f"{datetime.today().year - 10}-{datetime.today().month }-{datetime.today().day}"
df = df[df.index > startDate].dropna(subset=['preco'])

missing_values = df.isnull().sum()

df = df.dropna()

df_monthly = df.resample('M').mean()

#### Médias móveis: eliminam ruído e ajudam a identificar tendências sem depender apenas dos valores brutos.

In [13]:
df['SMA_3'] = df['preco'].rolling(window=3).mean()
df['SMA_6'] = df['preco'].rolling(window=6).mean()  
df['SMA_12'] = df['preco'].rolling(window=12).mean() 

#### Variação percentual: calculo da variação percentual do preço do petróleo em relação ao mês anterior, ajudando a identificar mudanças abruptas, capturando momentos de alta volatilidade.

In [14]:
df['pct_change'] = df['preco'].pct_change()

#### Preço desafado (lag features): mantêm informações do passado (1, 3 e 6 meses anteriores), fundamentais para previsões de séries temporais.

In [15]:
df['lag_1'] = df['preco'].shift(1)
df['lag_3'] = df['preco'].shift(3)
df['lag_6'] = df['preco'].shift(6)

In [16]:
# Removendo valores nulos criados pelas transformações
df.dropna(inplace=True)

df

Unnamed: 0_level_0,preco,SMA_3,SMA_6,SMA_12,pct_change,lag_1,lag_3,lag_6
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-02-18,60.72,59.093333,57.258333,56.216667,0.006464,60.33,53.48,56.23
2015-02-19,58.78,59.943333,57.555000,56.580833,-0.031950,60.72,56.23,57.00
2015-02-20,61.57,60.356667,58.518333,57.122500,0.047465,58.78,60.33,55.79
2015-02-21,60.78,60.376667,59.735000,57.522500,-0.012831,61.57,60.72,53.48
2015-02-22,60.72,61.023333,60.483333,57.925833,-0.000987,60.78,58.78,56.23
...,...,...,...,...,...,...,...,...
2025-01-28,78.01,78.006667,78.868333,80.738333,0.009185,77.30,78.62,81.68
2025-01-29,77.02,77.443333,78.276667,80.265833,-0.012691,78.01,78.71,80.57
2025-01-30,77.42,77.483333,77.846667,79.851667,0.005193,77.02,77.30,80.00
2025-01-31,77.11,77.183333,77.595000,79.320833,-0.004004,77.42,78.01,78.62
