# Import Required Libraries
Import the necessary libraries, including NumPy, Pandas, Matplotlib, TensorFlow, and Scikit-learn.

In [51]:
# Import Required Libraries

# Importing the necessary libraries for data manipulation, visualization, and machine learning
import numpy as np  # Library for numerical operations
import pandas as pd  # Library for data manipulation and analysis
import matplotlib  # Library for creating static, animated, and interactive visualizations
import matplotlib.pyplot as plt  # Library for creating static, animated, and interactive visualizations
import tensorflow as tf  # Library for machine learning and deep learning
import sklearn  # Library for machine learning
from tensorflow.keras.models import Sequential # Importing the Sequential model from Keras
from tensorflow.keras.layers import Dense, Dropout, LSTM # Importing the Dense, Dropout, and LSTM layers from Keras
from sklearn.preprocessing import MinMaxScaler # Importing the MinMaxScaler from scikit-learn

# Load and Preprocess Training Data
Load the training dataset from a CSV file and handle missing values.

In [52]:
# Load and Preprocess Training Data

# Load the training dataset from a CSV file
base = pd.read_csv('data/petr4_treinamento_ex.csv')

# Handle missing values by dropping any rows with NaN values
base = base.dropna()

# Extract the 'Open' column as the training data
base_treinamento = base.iloc[:, 1:2].values

# Normalize Training Data
Normalize the training data using MinMaxScaler.

In [53]:
# Normalize Training Data

# Normalize the training data using MinMaxScaler to scale the values between 0 and 1
normalizador = MinMaxScaler(feature_range=(0, 1))
base_treinamento_normalizada = normalizador.fit_transform(base_treinamento)

# Create Data Structure for LSTM
Create the data structure with 90 timesteps and 1 output for the LSTM model.

In [54]:
# Create Data Structure for LSTM

# Create the data structure with 90 timesteps and 1 output for the LSTM model
previsores = []  # List to store the predictors
preco_real = []  # List to store the real stock prices

# Loop to create the data structure
for i in range(90, 1342):
    previsores.append(base_treinamento_normalizada[i-90:i, 0])  # Append the previous 90 days' data
    preco_real.append(base_treinamento_normalizada[i, 0])  # Append the current day's data

# Convert lists to numpy arrays
previsores, preco_real = np.array(previsores), np.array(preco_real)

# Reshape the predictors to be compatible with the LSTM model
previsores = np.reshape(previsores, (previsores.shape[0], previsores.shape[1], 1))

# Build LSTM Model
Build the LSTM model with multiple LSTM layers and Dropout regularization.

In [None]:
# Build LSTM Model

# Initialize the LSTM model
regressor = Sequential()

# Add the first LSTM layer with Dropout regularization
regressor.add(LSTM(units=100, return_sequences=True, input_shape=(previsores.shape[1], 1)))
regressor.add(Dropout(0.3))

# Add the second LSTM layer with Dropout regularization
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

# Add the third LSTM layer with Dropout regularization
regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

# Add the fourth LSTM layer with Dropout regularization
regressor.add(LSTM(units=50))
regressor.add(Dropout(0.2))

# Add the output layer with a linear activation function
regressor.add(Dense(units=1, activation='linear'))

# Compile the LSTM model
regressor.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the LSTM model on the training data
regressor.fit(previsores, preco_real, epochs=100, batch_size=32)

# Compile and Train the Model
Compile the LSTM model with the RMSprop optimizer and mean squared error loss function, then train the model.

In [None]:
# Compile and Train the Model

# Compile the LSTM model
regressor.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the LSTM model on the training data
regressor.fit(previsores, preco_real, epochs=100, batch_size=32)

# Load and Preprocess Test Data
Load the test dataset from a CSV file and preprocess it for prediction.

In [None]:
# Load and Preprocess Test Data

# Load the test dataset from a CSV file
base_teste = pd.read_csv('data/petr4_teste_ex.csv')

# Extract the 'Open' column as the real stock prices for the test data
preco_real_teste = base_teste.iloc[:, 1:2].values

# Concatenate the training and test data for normalization
base_completa = pd.concat((base['Open'], base_teste['Open']), axis=0)

# Prepare the inputs for the test data
entradas = base_completa[len(base_completa) - len(base_teste) - 90:].values
entradas = entradas.reshape(-1, 1)
entradas = normalizador.transform(entradas)

# Create the data structure for the test data with 90 timesteps
X_teste = []
for i in range(90, 109):
    X_teste.append(entradas[i-90:i, 0])
X_teste = np.array(X_teste)

# Reshape the test predictors to be compatible with the LSTM model
X_teste = np.reshape(X_teste, (X_teste.shape[0], X_teste.shape[1], 1))

# Prepare Test Data for Prediction
Prepare the test data by creating the required data structure for the LSTM model.

In [None]:
# Prepare Test Data for Prediction

# Load the test dataset from a CSV file
base_teste = pd.read_csv('petr4_teste_ex.csv')

# Extract the 'Open' column as the real stock prices for the test data
preco_real_teste = base_teste.iloc[:, 1:2].values

# Concatenate the training and test data for normalization
base_completa = pd.concat((base['Open'], base_teste['Open']), axis=0)

# Prepare the inputs for the test data
entradas = base_completa[len(base_completa) - len(base_teste) - 90:].values
entradas = entradas.reshape(-1, 1)
entradas = normalizador.transform(entradas)

# Create the data structure for the test data with 90 timesteps
X_teste = []
for i in range(90, 109):
    X_teste.append(entradas[i-90:i, 0])
X_teste = np.array(X_teste)

# Reshape the test predictors to be compatible with the LSTM model
X_teste = np.reshape(X_teste, (X_teste.shape[0], X_teste.shape[1], 1))

# Make Predictions
Make predictions using the trained LSTM model and inverse transform the normalized predictions.

In [None]:
# Make Predictions

# Use the trained LSTM model to make predictions on the test data
previsoes = regressor.predict(X_teste)

# Inverse transform the normalized predictions to get the actual stock prices
previsoes = normalizador.inverse_transform(previsoes)

# Calculate the mean of the predictions
previsoes.mean()

# Calculate the mean of the real stock prices for the test data
preco_real_teste.mean()

# Plot the real stock prices and the predicted stock prices
plt.plot(preco_real_teste, color='red', label='Preço real')
plt.plot(previsoes, color='blue', label='Previsões')
plt.title('Previsão preço das ações')
plt.xlabel('Tempo')
plt.ylabel('Valor Yahoo')
plt.legend()
plt.show()

# Visualize Predictions
Visualize the real stock prices and the predicted stock prices using Matplotlib.

In [None]:
# Visualize Predictions

# Plot the real stock prices and the predicted stock prices
plt.plot(preco_real_teste, color='red', label='Preço real')  # Plot the real stock prices in red
plt.plot(previsoes, color='blue', label='Previsões')  # Plot the predicted stock prices in blue
plt.title('Previsão preço das ações')  # Set the title of the plot
plt.xlabel('Tempo')  # Set the label for the x-axis
plt.ylabel('Valor Yahoo')  # Set the label for the y-axis
plt.legend()  # Display the legend
plt.show()  # Show the plot