In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "32"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, PReLU, ConvLSTM1D, Dropout, Input
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import yfinance as yf
import pandas as pd
import mlflow
import mlflow.keras
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import TimeSeriesSplit, train_test_split
import torch

In [3]:
# Função para baixar os dados históricos
def retrieve_stock_data(ticker, start_dt):
    data = yf.download(ticker, start=start_dt)
    data.drop(columns=['Volume','High', 'Low', 'Open'], inplace=True)
    return data  # Apenas o preço de fechamento

In [None]:
# Exemplo para obter os dados históricos de PETR4
ticker = 'PETR4.SA'
data = retrieve_stock_data(ticker, '2018-01-01')
data.head()

In [None]:
plt.plot(data)
plt.show()

In [6]:
# Função para criar os conjuntos de dados de treino e teste
def format_timeseries_dataset(data, lookback=60):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i, 0])  # Últimos N registros
        y.append(data[i, 0])  # Próximo valor
    X, y = np.array(X), np.array(y)
    return X, y

In [None]:
ticker = 'PETR4.SA'
data = retrieve_stock_data(ticker, '2018-01-01')

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.values)

train_data, test_data = train_test_split(scaled_data, test_size=0.3, random_state=1, shuffle=False)

lookback = 10
X_train, y_train = format_timeseries_dataset(train_data, lookback)
X_test, y_test = format_timeseries_dataset(test_data, lookback)

In [None]:
print(len(train_data), len(test_data), len(test_data) / (len(train_data) + len(test_data)))

In [None]:
print(len(X_train), len(y_train))
print(len(X_test), len(y_test))

In [None]:
def create_model(input: Input):
    model = Sequential()
    model.add(input)
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dense(units=1))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dense(units=1))  

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

model = create_model(Input((1, 1)))
model_history = model.fit(X_train, y_train, epochs=300, batch_size=32, validation_data=(X_test, y_test))

In [None]:
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.show()

In [12]:
def plot_model_predictions(model, scaler, data_x, data_y, title):
    predictions = model.predict(data_x)
    inverted_data = scaler.inverse_transform(data_y.reshape(-1,1))
    inverted_y_predictions = scaler.inverse_transform(predictions.reshape(-1,1))

    plt.figure(figsize=(14, 7))
    plt.plot(inverted_data , color='blue', label='Preço Real')
    plt.plot(inverted_y_predictions, color='red', label='Preço Previsto')
    plt.title(title)
    plt.xlabel('Data')
    plt.ylabel('Preço de Fechamento (R$)')
    plt.legend()
    plt.show()

In [None]:
plot_model_predictions(model, scaler, X_train, y_train, 'Previsão de Preços de Ações (Dados de Treino)')

In [None]:
plot_model_predictions(model, scaler, X_test, y_test, 'Previsão de Preços de Ações (Dados de Teste)')

## Metrics

In [None]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error (MSE): {mse}')

In [16]:
data_splits = 5
tscv = TimeSeriesSplit(n_splits=data_splits)
mses = []
losses = []

In [None]:
for train_idx, test_idx in tscv.split(scaled_data):

    train_data, test_data = scaled_data[train_idx], scaled_data[test_idx]
    
    X_train, y_train = format_timeseries_dataset(train_data, lookback)
    X_test, y_test = format_timeseries_dataset(test_data, lookback)
    
    model = create_model(Input((1, 1)))
    model.fit(X_train, y_train, epochs=300, batch_size=32, validation_data=(X_test, y_test))

    y_pred = model.predict(X_test)
    loss = model.evaluate(X_test, y_test)
    losses.append(loss)
    
    mse = mean_squared_error(y_test, y_pred)
    mses.append(mse)

print(f'Mean Cross-Validation MSE: {np.mean(mses)}')

In [None]:
mses

In [None]:
losses

In [None]:
print(np.mean(mses), np.mean(losses))