## Previsão dos preços do Petróleo tipo Brent utilizando redes neurais recorrentes.

### Por que utilizar RNNs:


Modelos de regressão que empregam Redes Neurais Recorrentes (RNNs) são excepcionalmente adequados para analisar dados de séries temporais, o que os torna ferramentas valiosas para previsões no contexto do mercado de ações, como o valor do Petróleo Brent. A característica distintiva das RNNs, especialmente quando comparadas a redes neurais convencionais, é a sua habilidade de processar sequências de dados com dependências de longo alcance graças à incorporação de células de Memória de Longo e Curto Prazo (LSTM). Estas células LSTM são cruciais, pois elas permitem que o modelo identifique e aprenda padrões complexos que se desdobram ao longo do tempo, algo comum nas flutuações do mercado financeiro.

### Carregando os dados históricos:

In [28]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import yfinance as yf
from datetime import datetime
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import mean_absolute_error
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping 
from sklearn.preprocessing import  MinMaxScaler
import os
import sys

sys.path.append(os.path.abspath(os.path.join('..', 'Files')))
import utils # type: ignore

import warnings
warnings.filterwarnings('ignore')

print('Tensorflow Version: ' + tf.__version__)
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

df = pd.read_csv('../Files/oil_prices.csv',sep=';')
df['period'] = pd.to_datetime(df['period'])
df.head()

Tensorflow Version: 2.10.1
Num GPUs: 1


Unnamed: 0,period,value
0,2024-05-13,83.18
1,2024-05-10,83.39
2,2024-05-09,83.26
3,2024-05-08,82.44
4,2024-05-07,82.69


### Selecionando Recursos:

In [29]:
train_df = df.sort_values(by=['period']).copy()

data = pd.DataFrame(train_df)
data_filtered = data
data_filtered.set_index('period', inplace=True)
data_filtered_ext = data_filtered.copy()
data_filtered_ext['Prediction'] = data_filtered_ext['value']
data_filtered_ext.tail()

Unnamed: 0_level_0,value,Prediction
period,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-05-07,82.69,82.69
2024-05-08,82.44,82.44
2024-05-09,83.26,83.26
2024-05-10,83.39,83.39
2024-05-13,83.18,83.18


### Dimensionando os dados de entrada multivariados:

In [30]:
nrows = data_filtered.shape[0]
np_data_unscaled = np.array(data_filtered)
np_data = np.reshape(np_data_unscaled, (nrows, -1))
print(np_data.shape)
scaler = MinMaxScaler()
np_data_scaled = scaler.fit_transform(np_data_unscaled)
scaler_pred = MinMaxScaler()
df_Close = pd.DataFrame(data_filtered_ext['value'])
np_Close_scaled = scaler_pred.fit_transform(df_Close)

(11169, 1)


### Transformando os dados multivariados:

In [31]:
sequence_length = 50

index_Close = data.columns.get_loc('value')

train_data_len = math.ceil(np_data_scaled.shape[0] * 0.8)

train_data = np_data_scaled[0:train_data_len, :]
test_data = np_data_scaled[train_data_len - sequence_length:, :]

def partition_dataset(sequence_length, data):
    x, y = [], []
    data_len = data.shape[0]
    for i in range(sequence_length, data_len):
        x.append(data[i-sequence_length:i,:])
        y.append(data[i, index_Close])     

    x = np.array(x)
    y = np.array(y)
    return x, y


x_train, y_train = partition_dataset(sequence_length, train_data)
x_test, y_test = partition_dataset(sequence_length, test_data)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

print(x_train[1][sequence_length-1][index_Close])
print(y_train[0])

(8886, 50, 1) (8886,)
(2233, 50, 1) (2233,)
0.08231368186874306
0.08231368186874306


### Treinar o modelo de previsão multivariada:

In [32]:
model = Sequential()

n_neurons = x_train.shape[1] * x_train.shape[2]
print(n_neurons, x_train.shape[1], x_train.shape[2])
model.add(LSTM(n_neurons, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2]))) 
model.add(LSTM(n_neurons, return_sequences=False))
model.add(Dense(5))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

epochs = 50
batch_size = 16
early_stop = EarlyStopping(monitor='loss', patience=5, verbose=1)
history = model.fit(x_train, y_train, 
                    batch_size=batch_size, 
                    epochs=epochs,
                    validation_data=(x_test, y_test)
                   )

50 50 1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Analisando a curva de perdas:

In [33]:
loss_values = history.history["loss"]
epochs_range = list(range(len(loss_values)))

fig = go.Figure()

fig.add_trace(go.Scatter(x=epochs_range, y=loss_values, mode='lines', name='Train'))

fig.update_layout(
    title="Model loss",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    xaxis=dict(
        dtick=1,  
        tickmode='array',
        tickvals=list(range(0, len(loss_values), max(1, len(loss_values)//epochs)))
    ),
    yaxis=dict(
        gridcolor='lightgrey'
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    margin=dict(l=40, r=40, t=40, b=40)
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey')

fig.show()

### Avaliando o desempenho do modelo:

In [34]:
y_pred_scaled = model.predict(x_test)

y_pred = scaler_pred.inverse_transform(y_pred_scaled)
y_test_unscaled = scaler_pred.inverse_transform(y_test.reshape(-1, 1))

MAPE = np.mean((np.abs(np.subtract(y_test_unscaled, y_pred)/ y_test_unscaled))) * 100
print(f'Mean Absolute Percentage Error (MAPE): {np.round(MAPE, 2)} %')

MDAPE = np.median((np.abs(np.subtract(y_test_unscaled, y_pred)/ y_test_unscaled)) ) * 100
print(f'Median Absolute Percentage Error (MDAPE): {np.round(MDAPE, 2)} %')

Mean Absolute Percentage Error (MAPE): 2.08 %
Median Absolute Percentage Error (MDAPE): 1.43 %


In [35]:
display_start_date = "1987-01-01" 

train = pd.DataFrame(data_filtered_ext['value'][:train_data_len + 1]).rename(columns={'value': 'y_train'})
valid = pd.DataFrame(data_filtered_ext['value'][train_data_len:]).rename(columns={'value': 'y_test'})
valid.insert(1, "y_pred", y_pred, True)
valid.insert(1, "residuals", valid["y_pred"] - valid["y_test"], True)
df_union = pd.concat([train, valid])

df_union_zoom = df_union[df_union.index > display_start_date]
df_sub = df_union_zoom['residuals'].dropna().apply(lambda x: "#2BC97A" if x > 0 else "#C92B2B")

fig = go.Figure()

fig.add_trace(go.Scatter(x=df_union_zoom.index, y=df_union_zoom['y_pred'], mode='lines', name='y_pred',
                         line=dict(color="#090364", width=1.0)))
fig.add_trace(go.Scatter(x=df_union_zoom.index, y=df_union_zoom['y_train'], mode='lines', name='y_train',
                         line=dict(color="#1960EF", width=1.0)))
fig.add_trace(go.Scatter(x=df_union_zoom.index, y=df_union_zoom['y_test'], mode='lines', name='y_test',
                         line=dict(color="#EF5919", width=1.0)))

fig.add_trace(go.Bar(x=df_union_zoom['residuals'].dropna().index, y=df_union_zoom['residuals'].dropna(), name='residuals', 
                     marker=dict(color=df_sub), opacity=0.6))

fig.update_layout(
    title="y_pred vs y_test",
    yaxis_title="Oil Prices",
    height=800,
    xaxis_rangeslider_visible=True,
    xaxis_title="Date",
    xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label='1m', step='month', stepmode='backward'),
                    dict(count=6, label='6m', step='month', stepmode='backward'),
                    dict(count=1, label='YTD', step='year', stepmode='todate'),
                    dict(count=1, label='1y', step='year', stepmode='backward'),
                    dict(step='all')
                ])
            ),
            type='date'
        ),
    legend_title="Legend",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.show()

### Prevendo o valor do dia seguinte:

In [68]:
df_temp = df[:sequence_length]
df_temp.set_index('period', inplace=True)
new_df = df_temp.sort_values(by=['period']).copy()

last_N_days = new_df.values

last_N_days_scaled = scaler.transform(last_N_days)

X_test_new = []
X_test_new.append(last_N_days_scaled)

pred_price_scaled = model.predict(np.array(X_test_new))
pred_price_unscaled = scaler_pred.inverse_transform(pred_price_scaled.reshape(-1, 1))

price_today = np.round(new_df['value'][-1], 2)
predicted_price = np.round(pred_price_unscaled.ravel()[0], 2)
change_percent = np.round(100 - (price_today * 100)/predicted_price, 2)

plus = '+'; minus = ''
print(f'O valor do Petróleo Brent para último dia disponível {price_today}')
print(f'O valor do Petróleo Brent predito é {predicted_price} ({plus if change_percent > 0 else minus}{change_percent}%)')

O valor do Petróleo Brent para último dia disponível 83.18
O valor do Petróleo Brent predito é 83.45999908447266 (+0.34%)


In [37]:
import pickle
with open('../Files/brent_oil_model.pkl', 'wb') as file:
    pickle.dump(model, file)




INFO:tensorflow:Assets written to: ram://18e8ede7-a8ac-4677-9f6a-267a57ef5d63/assets


INFO:tensorflow:Assets written to: ram://18e8ede7-a8ac-4677-9f6a-267a57ef5d63/assets


In [69]:
model.save('../Files/brent_oil_model.h5')