# Trabalho de Aplicação do Conhecimento (TAC)
# Aluna: MFSSM
# <font color='orangered'>Redes Neurais Profundas</font>

## Instalando e Carregando os Pacotes

In [None]:
# !pip install -q tensorflow==2.11.0

In [None]:
%env TF_CPP_MIN_LOG_LEVEL=3

In [None]:
# # https://www.tensorflow.org/
# !pip install -q tensorflow

In [None]:
# # https://pypi.org/project/yfinance/
# !pip install -q yfinance==0.2.22

In [None]:
# # https://pypi.org/project/ta/
# !pip install -q ta==0.10.2

In [None]:
# Imports
import ta
import sklearn
import pandas as pd
import numpy as np
import tensorflow
from tensorflow import keras
from keras import layers
from sklearn import metrics
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import yfinance as yf
import matplotlib.pyplot as plt
from matplotlib import cycler
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers

#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense, LSTM, Dropout


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, make_scorer, mean_squared_error, explained_variance_score
import matplotlib.dates as mdates
plt.rcParams['figure.figsize'] = (10, 5)
plt.style.use('fivethirtyeight')
plt.style.use('seaborn')

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Carregando os dados em tempo real

In [None]:
# Função para extrair os dados das ações direto do portal Yahoo! Finance
def extrai_dados(ticker):
    dados = yf.download(ticker, start = "2010-01-01", end= "2024-08-31")
    dados.columns = ["open", "high", "low", "close", "adj close", "volume"]
    dados.index.name = "date"
    return dados

df = extrai_dados("AMZN")

In [None]:
df.head(1)

## Função Para Engenharia de Atributos de Dados Financeiros

In [None]:
def func_engenharia_atributos(df):
    df_copy = df.copy()

    # Variável retorno
    df_copy["retorno"] = df_copy["close"].pct_change(1)

    # Shift das colunas de preço do ativo financeiro
    df_copy["op"] = df_copy["open"].shift(1)
    df_copy["hi"] = df_copy["high"].shift(1)
    df_copy["lo"] = df_copy["low"].shift(1)
    df_copy["clo"] = df_copy["close"].shift(1)

    # Shift da coluna Volume
    df_copy["vol"] = df_copy["volume"].shift(1)

    # Média Móvel (SMA)
    df_copy["SMA 25"] = df_copy[["close"]].rolling(25).mean().shift(1)
    df_copy["SMA 300"] = df_copy[["close"]].rolling(300).mean().shift(1)

    # Desvio-padrão móvel (MSD)
    df_copy["MSD 25"] = df_copy["retorno"].rolling(25).std().shift(1)
    df_copy["MSD 300"] = df_copy["retorno"].rolling(300).std().shift(1)

    # Volume Weighted Average Price (VWAP)
    vwap = ta.volume.VolumeWeightedAveragePrice(high = df['high'],
                                                low = df['low'],
                                                close = df['close'],
                                                volume = df['volume'],
                                                window = 5)
    df_copy["VWAP"] = vwap.vwap.shift(1)

    # Índice de Força Relativa (RSI)
    RSI = ta.momentum.RSIIndicator(df_copy["close"], window = 5, fillna = False)
    df_copy["RSI"] = RSI.rsi().shift(1)

    return df_copy.dropna()

In [None]:
# Engenharia de atributos
df = func_engenharia_atributos(df)

In [None]:
df.head(1)

In [None]:
# Plot
plt.figure(figsize = (20,5))
plt.grid()
plt.plot(df.index, df['close'], color = 'blue')
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval = 12))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%Y'))
plt.gcf().autofmt_xdate() 
plt.legend(['Cotação'], loc='best')
plt.title('Cotação de ações entre 2019-2024\n')
plt.ylabel('\nCotação das ações')
plt.xlabel('\nData')

In [None]:
# Plot da cotação de outros commodities
fig = make_subplots(rows = 6, cols = 1)

fig.add_trace(go.Scatter(x = df.reset_index()['date'], 
                         y = df.reset_index()['open'],
                         name = 'open'), row = 1, col = 1
)
fig.add_trace(go.Scatter(x = df.reset_index()['date'], 
                         y = df.reset_index()['high'],
                         name = 'high'), row = 2, col = 1
)
fig.add_trace(go.Scatter(x = df.reset_index()['date'], 
                         y = df.reset_index()['low'],
                         name = 'low'), row = 3, col = 1
)
fig.add_trace(go.Scatter(x = df.reset_index()['date'], 
                         y = df.reset_index()['close'],
                         name = 'close'), row = 4, col = 1
)
fig.add_trace(go.Scatter(x = df.reset_index()['date'], 
                         y = df.reset_index()['adj close'],
                         name = 'adj close'), row = 5, col = 1
)
fig.update_layout(height = 700, width = 900, title_text = "Valores ao Longo do Tempo")
fig.show()

In [None]:
# # Plot
# plt.figure(figsize = (25,10))
# ax = df.plot.area(fontsize = 10);
# ax.set_xlabel('date');
# ax.legend(fontsize=12);

### Pré-Processamento dos Dados

In [None]:
# Proporção de dados para treino e teste
train_size = int(len(df) * 0.8)
test_size = len(df) - train_size

In [None]:
# Define o tamanho das amostras
train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]

In [None]:
print('Tamanho da Amostra de Treino:', len(train))
print('Tamanho da Amostra de Teste:', len(test))

In [None]:
print(train.shape)
train.head()

In [None]:
# Variáveis preditoras
variaveis_preditoras = ["retorno", "volume", "RSI", "SMA 25", "MSD 25", "MSD 300"]

In [None]:
# Prepara o padronizador das variáveis preditoras
scaler_preditores = MinMaxScaler()

In [None]:
# Fit nos dados de treino
scaler_preditores = scaler_preditores.fit(train[variaveis_preditoras].to_numpy())

In [None]:
# Transform nos dados de treino
train.loc[:, variaveis_preditoras] = scaler_preditores.transform(train[variaveis_preditoras].to_numpy())

In [None]:
train.head()

In [None]:
# Transform nos dados de teste
test.loc[:, variaveis_preditoras] = scaler_preditores.transform(test[variaveis_preditoras].to_numpy())

In [None]:
test.head()

In [None]:
# Padronizador da variável alvo
scaler_target = MinMaxScaler()

In [None]:
# Fit nos dados de treino
scaler_target = scaler_target.fit(train[['close']])

In [None]:
# Transform nos dados de treino
train['close'] = scaler_target.transform(train[['close']])

In [None]:
train['close'].head()

In [None]:
# Transform nos dados de teste
test['close'] = scaler_target.transform(test[['close']])

In [None]:
test['close'].head()

In [None]:
# Função para criar o dataset final
def createDataset(X, y, time_steps):
    
    Xs, ys = [], []
    
    for i in range(len(X) - time_steps):
        data = X.iloc[i:(i + time_steps)].values
        Xs.append(data)        
        ys.append(y.iloc[i + time_steps])
        
    return np.array(Xs), np.array(ys)

In [None]:
# se a série for trimestral, analisa o trimeste (3 meses); mensal, o mês/meses (1); diária, a qtde dias (ex: 30, 60, 90). 
# Se não incluir isso, a série será analisada diariamente (formato original)
time_steps = 25

In [None]:
X_train, y_train = createDataset(train, train.close, time_steps)

In [None]:
X_test, y_test = createDataset(test, test.close, time_steps)

In [None]:
print(X_train.shape, y_train.shape)

In [None]:
print(X_test.shape, y_test.shape)

In [None]:
X_train_mlp = X_train[:, :, 0]

In [None]:
X_test_mlp = X_test[:, :, 0]

# LSTM

In [None]:
# Cria o modelo
modelo_lstm = tf.keras.Sequential()
modelo_lstm.add(tf.keras.layers.LSTM(units = 128, input_shape = (X_train.shape[1], X_train.shape[2])))
modelo_lstm.add(tf.keras.layers.Dropout(rate = 0.2))
modelo_lstm.add(tf.keras.layers.Dense(units = 1))

In [None]:
# Compila o modelo
modelo_lstm.compile(loss = 'mse', optimizer = 'adam')

In [None]:
modelo_lstm.summary()

### treina o modelo

In [None]:
callbacks = [keras.callbacks.EarlyStopping(patience = 5, restore_best_weights = True)]

In [None]:
%%time
modelo_lstm_history = modelo_lstm.fit(X_train, 
                                      y_train,
                                      epochs = 100,
                                      batch_size = 32,
                                      validation_split = 0.1,
                                      callbacks = callbacks,
                                      shuffle = False)

In [None]:
# Plot
plt.plot(modelo_lstm_history.history['loss'])
plt.plot(modelo_lstm_history.history['val_loss'])
plt.title('Avaliação do Modelo')
plt.ylabel('Erro')
plt.xlabel('Epoch')
plt.legend(['Treino', 'Validação'], loc = 'upper right')
plt.show()

### previsões com dados de teste

In [None]:
y_pred = modelo_lstm.predict(X_test)

In [None]:
# Invertemos a escala para visualizar o valor real e a previsão
y_train_inv = scaler_target.inverse_transform(y_train.reshape(1, -1))
y_test_inv = scaler_target.inverse_transform(y_test.reshape(1, -1))
y_pred_inv = scaler_target.inverse_transform(y_pred.reshape(1,-1))

In [None]:
# Ajusta o shape
y_train_inv = y_train_inv.flatten()
y_test_inv = y_test_inv.flatten()
y_pred_inv = y_pred_inv.flatten()

In [None]:
# Plot
plt.plot(np.arange(0, len(y_train)), 
         y_train_inv, 
         color = 'green', 
         label = "Histórico")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_test_inv, 
         color = 'blue', 
         marker = '.', 
         label = "Valor Real")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_pred_inv, 
         color = 'red', 
         label = "Previsão")

plt.ylabel('Cotação das ações')
plt.xlabel('Data')
plt.legend()
plt.show();

### Avaliação

In [None]:
#MAPE function
def MAPE(y_true, y_pred):
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
# Imprime as métricas
MSE_lstm = mean_squared_error(y_test_inv, y_pred_inv)
print('MSE:', MSE_lstm)

RMSE_lstm = mean_squared_error(y_test_inv, y_pred_inv, squared = False)
print('RMSE:', RMSE_lstm)

MAPE_lstm = MAPE(y_test_inv, y_pred_inv)
print('MAPE:', MAPE_lstm)

# LSTM 2

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units = 120, return_sequences = True, input_shape = (X_train.shape[1], X_train.shape[2])))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(units = 120, return_sequences = True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(units = 120, return_sequences = True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(units = 120, return_sequences = False))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(units = 1))

In [None]:
model.compile(optimizer = 'adam', loss = 'mse')

In [None]:
model.summary()

In [None]:
modelo_lstm_history2 = model.fit(X_train, 
                                 y_train,
                                 epochs = 100,
                                 batch_size = 32,
                                 validation_split = 0.1,
                                 callbacks = callbacks,
                                 shuffle = False)

In [None]:
# Plot
plt.plot(modelo_lstm_history2.history['loss'])
plt.plot(modelo_lstm_history2.history['val_loss'])
plt.title('Avaliação do Modelo')
plt.ylabel('Erro')
plt.xlabel('Epoch')
plt.legend(['Treino', 'Validação'], loc = 'upper right')
plt.show()

In [None]:
y_pred2 = model.predict(X_test)

In [None]:
# Invertemos a escala para visualizar o valor real e a previsão
y_train_inv2 = scaler_target.inverse_transform(y_train.reshape(1, -1))
y_test_inv2 = scaler_target.inverse_transform(y_test.reshape(1, -1))
y_pred_inv2 = scaler_target.inverse_transform(y_pred2.reshape(1,-1))

In [None]:
# Ajusta o shape
y_train_inv2 = y_train_inv2.flatten()
y_test_inv2 = y_test_inv2.flatten()
y_pred_inv2 = y_pred_inv2.flatten()

In [None]:
# Plot
plt.plot(np.arange(0, len(y_train)), 
         y_train_inv2, 
         color = 'green', 
         label = "Histórico")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_test_inv2, 
         color = 'blue', 
         marker = '.', 
         label = "Valor Real")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_pred_inv2, 
         color = 'red', 
         label = "Previsão")

plt.ylabel('Cotação das ações')
plt.xlabel('Data')
plt.legend()
plt.show();

In [None]:
# Imprime as métricas
MSE_lstm2 = mean_squared_error(y_test_inv2, y_pred_inv2)
print('MSE:', MSE_lstm2)

RMSE_lstm2 = mean_squared_error(y_test_inv2, y_pred_inv2, squared = False)
print('RMSE:', RMSE_lstm2)

MAPE_lstm2 = MAPE(y_test_inv2, y_pred_inv2)
print('MAPE:', MAPE_lstm2)

# Modelagem com GRU

In [None]:
# Cria o modelo
modelo_gru = tf.keras.Sequential()
modelo_gru.add(tf.keras.layers.GRU(units = 128, input_shape = (X_train.shape[1], X_train.shape[2])))
modelo_gru.add(tf.keras.layers.Dropout(rate = 0.2))
modelo_gru.add(tf.keras.layers.Dense(units = 1))

In [None]:
# Compila o modelo
modelo_gru.compile(loss = 'mean_squared_error', optimizer = 'adam')

In [None]:
modelo_gru.summary()

### treina o modelo

In [None]:
%%time
modelo_gru_history = modelo_gru.fit(X_train, 
                                    y_train,
                                    epochs = 100,
                                    batch_size = 32,
                                    validation_split = 0.1,
                                    callbacks = callbacks,
                                    shuffle = False)

In [None]:
# Plot
plt.plot(modelo_gru_history.history['loss'])
plt.plot(modelo_gru_history.history['val_loss'])
plt.title('Avaliação do Modelo')
plt.ylabel('Erro')
plt.xlabel('Epoch')
plt.legend(['Treino', 'Validação'], loc = 'upper right')
plt.show()

In [None]:
# Previsão
y_predg = modelo_gru.predict(X_test)

In [None]:
y_predg

In [None]:
# Invertemos a escala para visualizar o valor real e a previsão
y_train_invg = scaler_target.inverse_transform(y_train.reshape(1, -1))
y_test_invg = scaler_target.inverse_transform(y_test.reshape(1, -1))
y_pred_invg = scaler_target.inverse_transform(y_predg.reshape(1,-1))

In [None]:
# Ajusta o shape
y_train_invg = y_train_invg.flatten()
y_test_invg = y_test_invg.flatten()
y_pred_invg = y_pred_invg.flatten()

In [None]:
# Plot
plt.plot(np.arange(0, len(y_train)), 
         y_train_invg, 
         color = 'green', 
         label = "Histórico")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_test_invg, 
         color = 'blue', 
         marker = '.', 
         label = "Valor Real")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_pred_invg, 
         color = 'red', 
         label = "Previsão")

plt.ylabel('Cotação das ações')
plt.xlabel('Data')
plt.legend()
plt.show();

In [None]:
# Imprime as métricas
MSE_gru = mean_squared_error(y_test_invg, y_pred_invg)
print('MSE:', MSE_gru)

RMSE_gru = mean_squared_error(y_test_invg, y_pred_invg, squared = False)
print('RMSE:', RMSE_gru)

MAPE_gru = MAPE(y_test_invg, y_pred_invg)
print('MAPE:', MAPE_gru)

# GRU 2

In [None]:
model_gru2 = tf.keras.Sequential()
model_gru2.add(tf.keras.layers.GRU(units = 120, return_sequences = True, input_shape = (X_train.shape[1], X_train.shape[2])))
model_gru2.add(tf.keras.layers.Dropout(0.2))
model_gru2.add(tf.keras.layers.GRU(units = 120, return_sequences = True))
model_gru2.add(tf.keras.layers.Dropout(0.2))
model_gru2.add(tf.keras.layers.GRU(units = 120, return_sequences = True))
model_gru2.add(tf.keras.layers.Dropout(0.2))
model_gru2.add(tf.keras.layers.GRU(units = 120, return_sequences = False))
model_gru2.add(tf.keras.layers.Dropout(0.2))
model_gru2.add(tf.keras.layers.Dense(units = 1))

In [None]:
model_gru2.compile(loss = 'mean_squared_error', optimizer = 'adam')

In [None]:
model_gru2.summary()

In [None]:
modelo_gru_history2 = model_gru2.fit(X_train, 
                                     y_train,
                                     epochs = 100,
                                     batch_size = 32,
                                     validation_split = 0.1,
                                     callbacks = callbacks,
                                     shuffle = False)

In [None]:
# Plot
plt.plot(modelo_gru_history2.history['loss'])
plt.plot(modelo_gru_history2.history['val_loss'])
plt.title('Avaliação do Modelo')
plt.ylabel('Erro')
plt.xlabel('Epoch')
plt.legend(['Treino', 'Validação'], loc = 'upper right')
plt.show()

In [None]:
y_pred_g2 = model_gru2.predict(X_test)

In [None]:
# Invertemos a escala para visualizar o valor real e a previsão
y_train_inv_g2 = scaler_target.inverse_transform(y_train.reshape(1, -1))
y_test_inv_g2 = scaler_target.inverse_transform(y_test.reshape(1, -1))
y_pred_inv_g2 = scaler_target.inverse_transform(y_pred_g2.reshape(1,-1))

In [None]:
# Ajusta o shape
y_train_inv_g2 = y_train_inv_g2.flatten()
y_test_inv_g2 = y_test_inv_g2.flatten()
y_pred_inv_g2 = y_pred_inv_g2.flatten()

In [None]:
# Plot
plt.plot(np.arange(0, len(y_train)), 
         y_train_inv_g2, 
         color = 'green', 
         label = "Histórico")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_test_inv_g2, 
         color = 'blue', 
         marker = '.', 
         label = "Valor Real")

plt.plot(np.arange(len(y_train), 
                   len(y_train) + len(y_test)), 
         y_pred_inv_g2, 
         color = 'red', 
         label = "Previsão")

plt.ylabel('Cotação das ações')
plt.xlabel('Data')
plt.legend()
plt.show();

In [None]:
# Imprime as métricas
MSE_gru2 = mean_squared_error(y_test_inv_g2, y_pred_inv_g2)
print('MSE:', MSE_gru2)

RMSE_gru2 = mean_squared_error(y_test_inv_g2, y_pred_inv_g2, squared = False)
print('RMSE:', RMSE_gru2)

MAPE_gru2 = MAPE(y_test_inv_g2, y_pred_inv_g2)
print('MAPE:', MAPE_gru2)