# Previsão dos valores do Petróleo tipo Brent utilizando SGBoost

### Importando as bibliotecas utilizadas:

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import warnings
import plotly.graph_objects as go

warnings.filterwarnings('ignore')

### Obter dados históricos do petróleo Brent:

In [2]:
ticker = 'BZ=F'
df = yf.download(ticker, start='1987-01-01', end='2024-05-17', progress=False)
df.reset_index(inplace=True)
df = df[['Date', 'Close']]
df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)



### Treinando o modelo:

In [3]:
# Criar recursos adicionais (features)
df['year'] = df['ds'].dt.year
df['month'] = df['ds'].dt.month
df['day'] = df['ds'].dt.day
df['dayofweek'] = df['ds'].dt.dayofweek

# Lag features (preços anteriores)
df['lag1'] = df['y'].shift(1)
df['lag2'] = df['y'].shift(2)
df['lag3'] = df['y'].shift(3)
df['lag7'] = df['y'].shift(7)

# Remover linhas com valores nulos (devido ao shift)
df.dropna(inplace=True)

# Dividir os dados em treino e teste
X = df[['year', 'month', 'day', 'dayofweek', 'lag1', 'lag2', 'lag3', 'lag7']]
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Treinar o modelo XGBoost
model = XGBRegressor(n_estimators=1000, learning_rate=0.01, max_depth=6, random_state=0)
model.fit(X_train, y_train)

# Fazer previsões
y_pred = model.predict(X_test)

### Avaliando o desempenho do modelo:

In [4]:
# Calcular MAPE
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Calcular RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# Calcular MAE
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Mean Absolute Percentage Error (MAPE): 0.02%
Root Mean Squared Error (RMSE): 1.85
Mean Absolute Error (MAE): 1.27


### Gerando previsão para os próximos 7 dias:

In [5]:
def forecast_next_days(model, df, days=7):
    last_row = df.iloc[-1]
    forecast_dates = pd.date_range(start=last_row['ds'], periods=days+1)[1:]
    forecast_df = pd.DataFrame({'ds': forecast_dates})
    forecast_df['year'] = forecast_df['ds'].dt.year
    forecast_df['month'] = forecast_df['ds'].dt.month
    forecast_df['day'] = forecast_df['ds'].dt.day
    forecast_df['dayofweek'] = forecast_df['ds'].dt.dayofweek

    forecasts = []
    last_values = last_row[['y', 'lag1', 'lag2', 'lag3', 'lag7']].values.flatten()

    for i in range(days):
        features = np.array([
            forecast_df.loc[i, 'year'],
            forecast_df.loc[i, 'month'],
            forecast_df.loc[i, 'day'],
            forecast_df.loc[i, 'dayofweek'],
            last_values[0], 
            last_values[1],
            last_values[2],
            last_values[3]
        ]).reshape(1, -1)
        
        yhat = model.predict(features)[0]
        forecasts.append(yhat)
        
        # Atualizar lag values
        last_values = np.roll(last_values, 1)
        last_values[0] = yhat
    
    forecast_df['yhat'] = forecasts
    return forecast_df

### Exibindo o gráfico com a previsão:

In [6]:
forecast_df = forecast_next_days(model, df)
last_7_days = forecast_df.tail(7)

fig = go.Figure()
fig.add_trace(go.Scatter(x=last_7_days['ds'], y=last_7_days['yhat'], mode='lines+markers+text', name='Previsões de 7 dias', line=dict(dash='dash'),
                         text=[f"{value:.2f}" for value in last_7_days['yhat']],
                         textposition="top center"))

fig.update_layout(xaxis_title='Data',
                  yaxis_title='US$')

### Exportando o modelo treinado:

In [7]:
import joblib
joblib.dump(model, '../Files/xgboost_model.pkl')

['../Files/xgboost_model.pkl']