# Importação das Bibliotecas

In [37]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from scikeras.wrappers import KerasRegressor

from keras.models import Sequential
from keras.layers import Input, LSTM, Dropout, Dense
from keras.callbacks import EarlyStopping
from keras.metrics import RootMeanSquaredError

# Carregamento

In [38]:
data = pd.read_csv('./data/results/BTC-USD.csv', index_col='Date', parse_dates=True)
data.head(5)

Unnamed: 0_level_0,Close,High,Low,Open,Volume,closeLag_1,closeLag_2,closeLag_3,closeLag_4,closeLag_5,...,closeLag_21,closeLag_22,closeLag_23,closeLag_24,closeLag_25,closeLag_26,closeLag_27,closeLag_28,closeLag_29,closeLag_30
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,998.325012,1003.080017,958.698975,963.65802,147775008,963.742981,961.237976,973.497009,975.921021,933.197998,...,769.731018,774.650024,772.794006,770.809998,768.132019,764.223999,758.700012,773.872009,771.155029,777.94397
2017-01-02,1021.75,1031.390015,996.702026,998.617004,222184992,998.325012,963.742981,961.237976,973.497009,975.921021,...,780.086975,769.731018,774.650024,772.794006,770.809998,768.132019,764.223999,758.700012,773.872009,771.155029
2017-01-03,1043.839966,1044.079956,1021.599976,1021.599976,185168000,1021.75,998.325012,963.742981,961.237976,973.497009,...,780.55603,780.086975,769.731018,774.650024,772.794006,770.809998,768.132019,764.223999,758.700012,773.872009
2017-01-04,1154.72998,1159.420044,1044.400024,1044.400024,344945984,1043.839966,1021.75,998.325012,963.742981,961.237976,...,781.481018,780.55603,780.086975,769.731018,774.650024,772.794006,770.809998,768.132019,764.223999,758.700012
2017-01-05,1013.380005,1191.099976,910.416992,1156.72998,510199008,1154.72998,1043.839966,1021.75,998.325012,963.742981,...,778.088013,781.481018,780.55603,780.086975,769.731018,774.650024,772.794006,770.809998,768.132019,764.223999


# Predição Dia Seguinte

In [39]:
def create_sequences_uni(data=[], seq_len=7):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return np.array(X), np.array(y)

In [40]:
def create_sequences_mult(X, y, seq_len=14):
    Xs, ys = [], []
    for i in range(len(X) - seq_len):
        Xs.append(X[i:i+seq_len])   
        ys.append(y[i+seq_len])    
    return np.array(Xs), np.array(ys)

In [None]:
train_size = int(0.8 * len(data))
train_data = data.iloc[:train_size].values
test_data = data.iloc[train_size:].values

scaler = MinMaxScaler(feature_range=(0,1))

data_train_normalized = pd.DataFrame(
    scaler.fit_transform(train_data),
    columns=data.columns,
    index=data[:train_size].index 
)

data_test_normalized = pd.DataFrame(
    scaler.transform(test_data),
    columns=data.columns,
    index=data[train_size:].index 
)

In [42]:
models_lstm = []
models_xgb = []
seq_len_options = [7, 14, 30]
split_options = [5, 10]

# LSTM (Long Short Term Memory)

In [43]:
def create_lstm_model(seq_len=14, n_features=1, neurons=100, drop=0.2, optimizer='Adam'):
  model = Sequential([
      Input(shape=(seq_len, n_features)),
      LSTM(neurons, return_sequences=True),
      Dropout(drop),
      LSTM((neurons//2), return_sequences=False),
      Dropout(drop),
      Dense(1),
  ])
  model.compile(optimizer=optimizer, loss='mse', metrics=[RootMeanSquaredError()])
  return model


param_grid = {
                # 'model__neurons': [16, 32, 64],
                # 'model__drop': [0.2, 0.25, 0.3],
                # 'model__optimizer': ['Adam'], 
                # 'fit__batch_size': [16, 32],   
                # 'fit__epochs': [50, 100]
                'model__neurons': [16],
                'model__drop': [0.2],
                'model__optimizer': ['Adam'], 
                'fit__batch_size': [16],   
                'fit__epochs': [100],
              }

###### 3.4.1.1 Modelo Univariado

In [57]:
for seq_len in seq_len_options:
  print(f"===== JANELA DE {seq_len} =====")

  lag_cols = [f'closeLag_{i}' for i in range(1, seq_len + 1)]
  X_train_scaled = data_train_normalized[lag_cols].values 
  X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
  y_train_scaled = data_train_normalized['Close'].values
  
  for split in split_options:
    print(f"_____ SPLIT DE {split} _____")

    model = KerasRegressor(model=create_lstm_model, verbose=0, seq_len=seq_len, n_features=1)

    tscv = TimeSeriesSplit(n_splits=split)

    grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, refit=True, scoring="neg_root_mean_squared_error", n_jobs = -1)

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    grid_result = grid.fit(X_train_scaled, y_train_scaled, callbacks=[early_stopping], validation_split=0.2)

    models_lstm.append({
      "params": grid_result.best_params_,
      "rmse": -grid_result.best_score_,
      "n_splits": split,
      "seq_len": seq_len,
      "type": "Univariado",
      "algorithm": "LSTM"
    })
    
    print(f"Menor RMSE: {-grid_result.best_score_:.3f} usando: {grid_result.best_params_}")

===== JANELA DE 7 =====
_____ SPLIT DE 5 _____
Menor RMSE: 0.100 usando: {'fit__batch_size': 16, 'fit__epochs': 100, 'model__drop': 0.2, 'model__neurons': 16, 'model__optimizer': 'Adam'}
_____ SPLIT DE 10 _____
Menor RMSE: 0.074 usando: {'fit__batch_size': 16, 'fit__epochs': 100, 'model__drop': 0.2, 'model__neurons': 16, 'model__optimizer': 'Adam'}
===== JANELA DE 14 =====
_____ SPLIT DE 5 _____
Menor RMSE: 0.177 usando: {'fit__batch_size': 16, 'fit__epochs': 100, 'model__drop': 0.2, 'model__neurons': 16, 'model__optimizer': 'Adam'}
_____ SPLIT DE 10 _____
Menor RMSE: 0.112 usando: {'fit__batch_size': 16, 'fit__epochs': 100, 'model__drop': 0.2, 'model__neurons': 16, 'model__optimizer': 'Adam'}
===== JANELA DE 30 =====
_____ SPLIT DE 5 _____
Menor RMSE: 0.181 usando: {'fit__batch_size': 16, 'fit__epochs': 100, 'model__drop': 0.2, 'model__neurons': 16, 'model__optimizer': 'Adam'}
_____ SPLIT DE 10 _____
Menor RMSE: 0.107 usando: {'fit__batch_size': 16, 'fit__epochs': 100, 'model__drop': 

###### 3.4.1.3 Resultados e Discussão

Nesta seção são apresentados e discutidos os resultados obtidos pelos modelos.
Comente sobre o desempenho de cada configuração de parâmetros e interprete as métricas.  
Apresente também comparações gráficas entre valores reais e previstos.


In [None]:
ordered_models = sorted(models_lstm, key=lambda x: x["rmse"])
ordered_models

In [None]:
grid_result.best_estimator_

In [None]:
y_pred_scaled = grid_result.best_estimator_.predict(X_test_scaled)

rmse = np.sqrt(mean_squared_error(y_test_scaled, y_pred_scaled))
mae = mean_absolute_error(y_test_scaled, y_pred_scaled)

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")

y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test = scaler_y.inverse_transform(y_test_scaled)


In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")

mae_percent = (mae / len(y_test)) * 100
rmse_percent = (rmse / len(y_test)) * 100
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print(f"MAE(%): {mae_percent:.2f}%")
print(f"RMSE(%): {rmse_percent:.2f}%")
print(f"MAPE(%): {mape:.2f}%")

In [None]:
r2 = r2_score(y_test, y_pred)
print(f"R²: {r2:.2f}")


In [None]:
y_pred_plot = np.empty_like(data['Close'])
y_pred_plot[:] = np.nan
y_pred_plot[-len(y_pred):] = y_pred.reshape(-1)

plt.figure(figsize=(20,8))
plt.plot(data.index, y_pred_plot, color="red", marker=",", label='Predicted Close')
plt.plot(data.index, np.array(data['Close']), color="black", marker=",", label='Total True Close')
plt.title('Close: total real vs predito')
plt.legend()
plt.show()

In [None]:

plt.figure(figsize=(20,8))
plt.plot(data.index[-len(y_pred):], y_pred, color="red", marker=",", label='Predicted Close')
plt.plot(data.index[-len(y_test):], y_test, color="black", marker=",", label='True Close')
plt.title('Close: real vs predito')
plt.legend()
plt.grid(True, linestyle="--")
plt.show()