# Import librairies

In [None]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras import optimizers
from keras.utils import plot_model
from keras.models import Sequential, Model
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, SimpleRNN, RepeatVector, TimeDistributed, Flatten
from keras import regularizers
from keras.layers import Dropout

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import chart_studio.plotly as py
import plotly.graph_objects as go  # ou plotly.graph_objs, dependendo da versão
from plotly.offline import init_notebook_mode, iplot

# Basic packages
import datetime # manipulating date formats
import seaborn as sns # for prettier plots


# TIME SERIES
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs


# settings
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)

# Set seeds to make the experiment more reproducible.
from tensorflow.random import set_seed
from numpy.random import seed
set_seed(1)
seed(1)

# Pré-processamento dos dados

* Carregando os dados
* Padronizando os dados
* Dividindo em treino, teste e validação

In [None]:
data = pd.read_csv('station_rio.csv')

In [None]:
temp = data.filter(["TEMP"])
temp_values = temp.values

In [None]:
scaler = MinMaxScaler(feature_range = (0,1))
scaled_data = scaler.fit_transform(temp_values)

In [None]:
scaled_data = temp_values

In [None]:
scaled_data.shape

In [None]:
scaled_data[:5]

## Train test split

In [None]:
# Definindo a proporção para treino, validação e teste
train_size = 0.7
valid_size = 0.15
test_size = 0.15

In [None]:
# Total de dados
total_data_len = len(scaled_data)
train_data_len = int(np.floor(total_data_len * train_size))
valid_data_len = int(np.floor(total_data_len * valid_size))
print(total_data_len)
print(train_data_len)
print(valid_data_len)


In [None]:
# Criando conjuntos de treinamento
train_data = scaled_data[0:train_data_len, :]
X_train, Y_train = [], []
for i in range(60, len(train_data)):
    X_train.append(train_data[i-60:i, 0])
    Y_train.append(train_data[i, 0])

X_train, Y_train = np.array(X_train), np.array(Y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

print(len(X_train))
print(len(Y_train))
print(len(train_data))

In [None]:
# Criando o conjunto de validação
valid_data = scaled_data[train_data_len:train_data_len + valid_data_len, :]
X_valid, Y_valid = [], []

for i in range(60, len(valid_data)):
    X_valid.append(valid_data[i-60:i, 0])
    Y_valid.append(valid_data[i, 0])

X_valid, y_valid = np.array(X_valid), np.array(Y_valid)
X_valid = np.reshape(X_valid, (X_valid.shape[0], X_valid.shape[1], 1))
Y_valid = np.array(Y_valid)

print(len(X_valid))
print(len(Y_valid))
print(len(valid_data))

In [None]:
# Ajustando o conjunto de teste para garantir que o tamanho de X_test e Y_test sejam iguais
test_data = scaled_data[train_data_len + valid_data_len - 60:, :]  # Substitua a linha anterior
Y_test = temp_values[train_data_len + valid_data_len:, :]  # Permanece o mesmo

X_test = []
for i in range(60, len(test_data)):
    X_test.append(test_data[i-60:i, 0])

X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

print(len(X_test))  # Agora deve corresponder ao tamanho de Y_test
print(len(Y_test))
print(len(test_data))

In [None]:
print(f'X_train length: {len(X_train)}')
print(f'Y_train length: {len(Y_train)}')
print(f'X_valid length: {len(X_valid)}')
print(f'y_valid length: {len(y_valid)}')
print(f'X_test length: {len(X_test)}')
print(f'Y_test length: {len(Y_test)}')

# Decomposição da Série

In [None]:
# Additive model
res = sm.tsa.seasonal_decompose(df_rio_intp,period=12,model="additive")
#plt.figure(figsize=(16,12))
fig = res.plot()
#fig.show()

In [None]:
# Stationarity tests
def test_stationarity(df_rio_intp):
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(df_rio_intp, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)

test_stationarity(df_rio_intp)

# MLP for Time Series Forecasting

* First we will use a Multilayer Perceptron model or MLP model, here our model will have input features equal to the window size.
* The thing with MLP models is that the model don't take the input as sequenced data, so for the model, it is just receiving inputs and don't treat them as sequenced data, that may be a problem since the model won't see the data with the sequence patter that it has.
* Input shape **[samples, timesteps]**.

In [None]:
epochs = 10
batch = 32
lr = 0.001
adam = optimizers.Adam(lr)

In [None]:
model_mlp = Sequential()
model_mlp.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model_mlp.add(Dense(32))
model_mlp.add(Dense(1))
model_mlp.compile(loss='mean_squared_error', optimizer=adam)
model_mlp.summary()

In [None]:
mlp_history = model_mlp.fit(X_train, Y_train, 
                            validation_data=(X_valid, Y_valid), 
                            epochs=epochs, 
                            verbose=2)

In [None]:
model_reg = Sequential()
model_reg.add(Dense(64, activation='relu', input_dim=X_train.shape[1], 
                     kernel_regularizer=regularizers.l2(0.01)))  # Regularização L2
model_reg.add(Dense(32, activation='relu', 
                     kernel_regularizer=regularizers.l2(0.01)))  # Regularização L2
model_reg.add(Dense(1))  # Saída
model_reg.compile(loss='mean_squared_error', optimizer=optimizers.Adam())
model_reg.summary()

In [None]:
mlp_reg_history = model_reg.fit(X_train, Y_train, 
                            validation_data=(X_valid, Y_valid), 
                            epochs=epochs, 
                            verbose=2)

In [None]:
model_dropout = Sequential()
model_dropout.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model_dropout.add(Dropout(0.5))  # 50% de dropout
model_dropout.add(Dense(32, activation='relu'))
model_dropout.add(Dropout(0.5))  # 50% de dropout
model_dropout.add(Dense(1))  # Saída
model_dropout.compile(loss='mean_squared_error', optimizer=optimizers.Adam())
model_dropout.summary()



In [None]:
mlp_drop_history = model_dropout.fit(X_train, Y_train, 
                            validation_data=(X_valid, Y_valid), 
                            epochs=epochs, 
                            verbose=2)

## Comparando os 3 modelos

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Função para calcular e exibir as métricas de cada modelo
def evaluate_model(model, X_test, Y_test, model_name="Modelo"):
    # Realiza as previsões no conjunto de teste
    Y_pred = model.predict(X_test)
    
    # Calcula as métricas
    mae = mean_absolute_error(Y_test, Y_pred)
    mse = mean_squared_error(Y_test, Y_pred)
    rmse = np.sqrt(mse)
    
    # Exibe os resultados
    print(f"\n{model_name}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    
    return mae, mse, rmse

# Avaliação de cada modelo no conjunto de teste
mae_standard, mse_standard, rmse_standard = evaluate_model(model_mlp, X_test, Y_test, "Modelo Padrão")
mae_reg, mse_reg, rmse_reg = evaluate_model(model_reg, X_test, Y_test, "Modelo com Regularização")
mae_dropout, mse_dropout, rmse_dropout = evaluate_model(model_dropout, X_test, Y_test, "Modelo com Dropout")

# Comparação dos resultados (opcional)
print("\nComparação de resultados:")
print(f"Modelo Padrão - MAE: {mae_standard:.4f}, RMSE: {rmse_standard:.4f}")
print(f"Modelo com Regularização - MAE: {mae_reg:.4f}, RMSE: {rmse_reg:.4f}")
print(f"Modelo com Dropout - MAE: {mae_dropout:.4f}, RMSE: {rmse_dropout:.4f}")


In [None]:
# Gráficos de ajuste combinado no conjunto de treinamento e validação e previsão no conjunto de teste
plt.figure(figsize=(14, 10))

# Modelo Simples
plt.subplot(2, 2, 1)
plt.plot(mlp_history.history['loss'], label='Treinamento')
plt.plot(mlp_history.history['val_loss'], label='Validação')
plt.title('Modelo Simples: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

# Modelo com Regularização
plt.subplot(2, 2, 2)
plt.plot(mlp_reg_history.history['loss'], label='Treinamento')
plt.plot(mlp_reg_history.history['val_loss'], label='Validação')
plt.title('Modelo com Regularização: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

# Modelo com Dropout
plt.subplot(2, 2, 3)
plt.plot(mlp_drop_history.history['loss'], label='Treinamento')
plt.plot(mlp_drop_history.history['val_loss'], label='Validação')
plt.title('Modelo com Dropout: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

# # Modelo com Regularização e Dropout
# plt.subplot(2, 2, 4)
# plt.plot(cnn_regdrop_history.history['loss'], label='Treinamento')
# plt.plot(cnn_regdrop_history.history['val_loss'], label='Validação')
# plt.title('Modelo com Regularização e Dropout: Perda durante o Treinamento')
# plt.xlabel('Épocas')
# plt.ylabel('Perda')
# plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Combine os dados de treino e validação em uma série para comparação visual com o conjunto de teste e previsões.
train_valid_data = np.concatenate([Y_train, Y_valid])  # Combina Y_train e Y_valid em uma série contínua

def plot_fit_with_predictions(train_valid_data, y_test, y_pred, model_name):
    plt.figure(figsize=(14, 7))
    
    # Plot dos dados de treino e validação
    plt.plot(range(len(train_valid_data)), train_valid_data, label="Dados de Treinamento e Validação", color="blue")
    
    # Plot dos dados de teste
    plt.plot(range(len(train_valid_data), len(train_valid_data) + len(y_test)), y_test, label="Dados de Teste", color="green")
    
    # Plot das previsões
    plt.plot(range(len(train_valid_data), len(train_valid_data) + len(y_test)), y_pred, label=f"Previsão {model_name}", color="red", linestyle="--")
    
    plt.title(f"Ajuste do Modelo com Previsões: {model_name}")
    plt.xlabel("Tempo")
    plt.ylabel("Valor")
    plt.legend()
    plt.show()

# Plot do ajuste e previsão para cada modelo
plot_fit_with_predictions(train_valid_data, Y_test, y_pred_mlp, "MLP Padrão")
plot_fit_with_predictions(train_valid_data, Y_test, y_pred_reg, "MLP com Regularização")
plot_fit_with_predictions(train_valid_data, Y_test, y_pred_drop, "MLP com Dropout")

# CNN for Time Series Forecasting

* For the CNN model we will use one convolutional hidden layer followed by a max pooling layer. The filter maps are then flattened before being interpreted by a Dense layer and outputting a prediction.
* The convolutional layer should be able to identify patterns between the timesteps.
* Input shape **[samples, timesteps, features]**.

#### Data preprocess
* Reshape from [samples, timesteps] into [samples, timesteps, features].
* This same reshaped data will be used on the CNN and the LSTM model.

In [None]:
#Hiperparametros
epochs = 20
batch = 32
lr = 0.001
#adam = optimizers.Adam(lr)


In [None]:
X_train_series = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid_series = X_valid.reshape((X_valid.shape[0], X_valid.shape[1], 1))
X_test_series = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

print('Train set shape', X_train_series.shape)
print('Validation set shape', X_valid_series.shape)
print('Test set shape', X_test_series.shape)

## Aplicando Modelo CNN

In [None]:
model_cnn = Sequential()
model_cnn.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_cnn.add(MaxPooling1D(pool_size=2))
model_cnn.add(Flatten())
model_cnn.add(Dense(50, activation='relu'))
model_cnn.add(Dense(1))
model_cnn.compile(loss='mse', optimizer=optimizers.Adam())
model_cnn.summary()

In [None]:
cnn_history = model_cnn.fit(X_train_series, Y_train, 
                            validation_data=(X_valid_series, Y_valid), 
                            epochs=epochs, 
                            verbose=2)

## Aplicando Regularização

In [None]:
model_cnn_reg = Sequential()
model_cnn_reg.add(Conv1D(filters=64, kernel_size=2, activation='relu', 
                         kernel_regularizer=regularizers.l2(0.01),  # Regularização L2
                         input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_cnn_reg.add(MaxPooling1D(pool_size=2))
model_cnn_reg.add(Flatten())
model_cnn_reg.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model_cnn_reg.add(Dense(1))
model_cnn_reg.compile(loss='mse', optimizer=optimizers.Adam())
model_cnn_reg.summary()


In [None]:
cnn_reg_history = model_cnn_reg.fit(X_train_series, Y_train, 
                            validation_data=(X_valid_series, Y_valid), 
                            epochs=epochs, 
                            verbose=2)

## Aplicando Dropout

In [None]:
model_cnn_drop = Sequential()
model_cnn_drop.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_cnn_drop.add(MaxPooling1D(pool_size=2))
model_cnn_drop.add(Dropout(0.5))  # Dropout após camada de pooling
model_cnn_drop.add(Flatten())
model_cnn_drop.add(Dense(50, activation='relu'))
model_cnn_drop.add(Dropout(0.5))  # Dropout após camada densa
model_cnn_drop.add(Dense(1))
model_cnn_drop.compile(loss='mse', optimizer=optimizers.Adam())
model_cnn_drop.summary()

In [None]:
cnn_drop_history = model_cnn_drop.fit(X_train_series, Y_train, 
                                       validation_data=(X_valid_series, Y_valid), 
                                       epochs=epochs, 
                                       verbose=2)


## Aplicando Regularização e Dropout combinados

In [None]:
model_cnn_regdrop = Sequential()
model_cnn_regdrop.add(Conv1D(filters=64, kernel_size=2, activation='relu', 
                             kernel_regularizer=regularizers.l2(0.01),
                             input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_cnn_regdrop.add(MaxPooling1D(pool_size=2))
model_cnn_regdrop.add(Dropout(0.5))
model_cnn_regdrop.add(Flatten())
model_cnn_regdrop.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model_cnn_regdrop.add(Dropout(0.5))
model_cnn_regdrop.add(Dense(1))
model_cnn_regdrop.compile(loss='mse', optimizer=optimizers.Adam())
model_cnn_regdrop.summary()

In [None]:
cnn_regdrop_history = model_cnn_regdrop.fit(X_train_series, Y_train, 
                                             validation_data=(X_valid_series, Y_valid), 
                                             epochs=epochs, 
                                             verbose=2)


## Preivsao dos Modelos

In [None]:
# Previsões
predictions_simple = model_cnn.predict(X_test)
predictions_reg = model_cnn_reg.predict(X_test)
predictions_drop = model_cnn_drop.predict(X_test)
predictions_regdrop = model_cnn_regdrop.predict(X_test)


In [None]:
# Função para calcular MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Função para calcular MSE
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Função para calcular RMSE
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Cálculo das métricas para cada modelo
metrics = {
    "Modelo Simples": {
        "MAPE": mean_absolute_percentage_error(Y_test, predictions_simple),
        "MSE": mean_squared_error(Y_test, predictions_simple),
        "RMSE": root_mean_squared_error(Y_test, predictions_simple)
    },
    "Modelo com Regularização": {
        "MAPE": mean_absolute_percentage_error(Y_test, predictions_reg),
        "MSE": mean_squared_error(Y_test, predictions_reg),
        "RMSE": root_mean_squared_error(Y_test, predictions_reg)
    },
    "Modelo com Dropout": {
        "MAPE": mean_absolute_percentage_error(Y_test, predictions_drop),
        "MSE": mean_squared_error(Y_test, predictions_drop),
        "RMSE": root_mean_squared_error(Y_test, predictions_drop)
    },
    "Modelo com Regularização e Dropout": {
        "MAPE": mean_absolute_percentage_error(Y_test, predictions_regdrop),
        "MSE": mean_squared_error(Y_test, predictions_regdrop),
        "RMSE": root_mean_squared_error(Y_test, predictions_regdrop)
    }
}

# Exibir as métricas
for model, values in metrics.items():
    print(f"{model}: MAPE={values['MAPE']:.2f}%, MSE={values['MSE']:.4f}, RMSE={values['RMSE']:.4f}")


In [None]:
# Gráficos de perda durante o treinamento
plt.figure(figsize=(14, 10))

# Modelo Simples
plt.subplot(2, 2, 1)
plt.plot(cnn_history.history['loss'], label='Treinamento')
plt.plot(cnn_history.history['val_loss'], label='Validação')
plt.title('Modelo Simples: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

# Modelo com Regularização
plt.subplot(2, 2, 2)
plt.plot(cnn_reg_history.history['loss'], label='Treinamento')
plt.plot(cnn_reg_history.history['val_loss'], label='Validação')
plt.title('Modelo com Regularização: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

# Modelo com Dropout
plt.subplot(2, 2, 3)
plt.plot(cnn_drop_history.history['loss'], label='Treinamento')
plt.plot(cnn_drop_history.history['val_loss'], label='Validação')
plt.title('Modelo com Dropout: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

# Modelo com Regularização e Dropout
plt.subplot(2, 2, 4)
plt.plot(cnn_regdrop_history.history['loss'], label='Treinamento')
plt.plot(cnn_regdrop_history.history['val_loss'], label='Validação')
plt.title('Modelo com Regularização e Dropout: Perda durante o Treinamento')
plt.xlabel('Épocas')
plt.ylabel('Perda')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Gráficos de predição
plt.figure(figsize=(14, 10))

# Modelo Simples
plt.subplot(2, 2, 1)
plt.plot(Y_test, label='Real', color='blue')
plt.plot(predictions_simple, label='Predito (Simples)', color='red')
plt.title('Modelo Simples')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

# Modelo com Regularização
plt.subplot(2, 2, 2)
plt.plot(Y_test, label='Real', color='blue')
plt.plot(predictions_reg, label='Predito (Reg)', color='red')
plt.title('Modelo com Regularização')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

# Modelo com Dropout
plt.subplot(2, 2, 3)
plt.plot(Y_test, label='Real', color='blue')
plt.plot(predictions_drop, label='Predito (Drop)', color='red')
plt.title('Modelo com Dropout')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

# Modelo com Regularização e Dropout
plt.subplot(2, 2, 4)
plt.plot(Y_test, label='Real', color='blue')
plt.plot(predictions_regdrop, label='Predito (Reg + Drop)', color='red')
plt.title('Modelo com Regularização e Dropout')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Gráficos de ajuste no conjunto de treinamento e previsão no conjunto de teste
plt.figure(figsize=(14, 10))

# Modelo Simples
plt.subplot(2, 2, 1)
plt.plot(Y_train, label='Real (Treino)', color='blue')  # Ajuste no conjunto de treino
plt.plot(np.arange(len(Y_train), len(Y_train) + len(predictions_simple)), predictions_simple, label='Predito (Teste)', color='red')  # Previsão no conjunto de teste
plt.title('Modelo Simples: Ajuste (Treino) e Previsão (Teste)')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

# Modelo com Regularização
plt.subplot(2, 2, 2)
plt.plot(Y_train, label='Real (Treino)', color='blue')  # Ajuste no conjunto de treino
plt.plot(np.arange(len(Y_train), len(Y_train) + len(predictions_reg)), predictions_reg, label='Predito (Teste)', color='red')  # Previsão no conjunto de teste
plt.title('Modelo com Regularização: Ajuste (Treino) e Previsão (Teste)')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

# Modelo com Dropout
plt.subplot(2, 2, 3)
plt.plot(Y_train, label='Real (Treino)', color='blue')  # Ajuste no conjunto de treino
plt.plot(np.arange(len(Y_train), len(Y_train) + len(predictions_drop)), predictions_drop, label='Predito (Teste)', color='red')  # Previsão no conjunto de teste
plt.title('Modelo com Dropout: Ajuste (Treino) e Previsão (Teste)')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

# Modelo com Regularização e Dropout
plt.subplot(2, 2, 4)
plt.plot(Y_train, label='Real (Treino)', color='blue')  # Ajuste no conjunto de treino
plt.plot(np.arange(len(Y_train), len(Y_train) + len(predictions_regdrop)), predictions_regdrop, label='Predito (Teste)', color='red')  # Previsão no conjunto de teste
plt.title('Modelo com Regularização e Dropout: Ajuste (Treino) e Previsão (Teste)')
plt.xlabel('Tempo')
plt.ylabel('Valor')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Combine os dados de treino e validação em uma série para comparação visual com o conjunto de teste e previsões.
train_valid_data = np.concatenate([Y_train, Y_valid])  # Combina Y_train e Y_valid em uma série contínua

def plot_fit_with_predictions(train_valid_data, y_test, y_pred, model_name):
    plt.figure(figsize=(14, 7))
    
    # Plot dos dados de treino e validação
    plt.plot(range(len(train_valid_data)), train_valid_data, label="Dados de Treinamento e Validação", color="blue")
    
    # Plot dos dados de teste
    plt.plot(range(len(train_valid_data), len(train_valid_data) + len(y_test)), y_test, label="Dados de Teste", color="green")
    
    # Plot das previsões
    plt.plot(range(len(train_valid_data), len(train_valid_data) + len(y_test)), y_pred, label=f"Previsão {model_name}", color="red", linestyle="--")
    
    plt.title(f"Ajuste do Modelo com Previsões: {model_name}")
    plt.xlabel("Tempo")
    plt.ylabel("Valor")
    plt.legend()
    plt.show()

# Plot do ajuste e previsão para cada modelo
plot_fit_with_predictions(train_valid_data, Y_test, predictions_simple, "CNN Padrão")
plot_fit_with_predictions(train_valid_data, Y_test, predictions_reg, "CNN com Regularização")
plot_fit_with_predictions(train_valid_data, Y_test, predictions_drop, "CNN com Dropout")
plot_fit_with_predictions(train_valid_data, Y_test, predictions_regdrop, "CNN com Regularização e Dropout")

# LSTM for Time Series Forecasting

* Now the LSTM model actually sees the input data as a sequence, so it's able to learn patterns from sequenced data (assuming it exists) better than the other ones, especially patterns from long sequences.
* Input shape **[samples, timesteps, features]**.

In [65]:
model_lstm = Sequential()
model_lstm.add(LSTM(50, activation='tanh', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_lstm.add(Dense(1))
model_lstm.compile(loss='mse', optimizer=optimizers.Adam())
model_lstm.summary()

In [66]:
lstm_history = model_lstm.fit(X_train_series, Y_train, validation_data=(X_valid_series, Y_valid), epochs=15, verbose=2)

Epoch 1/15
11/11 - 3s - 276ms/step - loss: 612.1807 - val_loss: 579.4474
Epoch 2/15
11/11 - 0s - 30ms/step - loss: 565.6359 - val_loss: 538.9734
Epoch 3/15
11/11 - 0s - 40ms/step - loss: 525.3078 - val_loss: 497.5480
Epoch 4/15
11/11 - 1s - 46ms/step - loss: 482.0794 - val_loss: 451.4311
Epoch 5/15
11/11 - 0s - 31ms/step - loss: 433.5675 - val_loss: 401.7878
Epoch 6/15
11/11 - 0s - 31ms/step - loss: 386.3129 - val_loss: 358.1352
Epoch 7/15
11/11 - 0s - 21ms/step - loss: 344.1666 - val_loss: 319.0901
Epoch 8/15
11/11 - 0s - 28ms/step - loss: 307.2392 - val_loss: 285.3972
Epoch 9/15
11/11 - 0s - 24ms/step - loss: 273.3243 - val_loss: 249.5125
Epoch 10/15
11/11 - 0s - 23ms/step - loss: 235.9312 - val_loss: 213.0459
Epoch 11/15
11/11 - 0s - 29ms/step - loss: 200.9870 - val_loss: 180.3166
Epoch 12/15
11/11 - 0s - 33ms/step - loss: 170.4737 - val_loss: 154.6104
Epoch 13/15
11/11 - 0s - 24ms/step - loss: 148.7137 - val_loss: 136.6457
Epoch 14/15
11/11 - 0s - 24ms/step - loss: 130.3198 - val_l

# CNN-LSTM for Time Series Forecasting
* Input shape **[samples, subsequences, timesteps, features]**.

#### Model explanation from the [article](https://machinelearningmastery.com/how-to-get-started-with-deep-learning-for-time-series-forecasting-7-day-mini-course/)
> "The benefit of this model is that the model can support very long input sequences that can be read as blocks or subsequences by the CNN model, then pieced together by the LSTM model."
>
> "When using a hybrid CNN-LSTM model, we will further divide each sample into further subsequences. The CNN model will interpret each sub-sequence and the LSTM will piece together the interpretations from the subsequences. As such, we will split each sample into 2 subsequences of 2 times per subsequence."
>
> "The CNN will be defined to expect 2 timesteps per subsequence with one feature. The entire CNN model is then wrapped in TimeDistributed wrapper layers so that it can be applied to each subsequence in the sample. The results are then interpreted by the LSTM layer before the model outputs a prediction."

#### Data preprocess
* Reshape from [samples, timesteps, features] into [samples, subsequences, timesteps, features].

In [67]:
subsequences = 2
timesteps = X_train_series.shape[1]//subsequences
X_train_series_sub = X_train_series.reshape((X_train_series.shape[0], subsequences, timesteps, 1))
X_valid_series_sub = X_valid_series.reshape((X_valid_series.shape[0], subsequences, timesteps, 1))
print('Train set shape', X_train_series_sub.shape)
print('Validation set shape', X_valid_series_sub.shape)

Train set shape (334, 2, 30, 1)
Validation set shape (24, 2, 30, 1)


In [70]:
model_cnn_lstm = Sequential()
model_cnn_lstm.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, X_train_series_sub.shape[2], X_train_series_sub.shape[3])))
model_cnn_lstm.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model_cnn_lstm.add(TimeDistributed(Flatten()))
model_cnn_lstm.add(LSTM(50, activation='relu'))
model_cnn_lstm.add(Dense(1))
model_cnn_lstm.compile(loss='mse', optimizer=optimizers.Adam())

In [71]:
cnn_lstm_history = model_cnn_lstm.fit(X_train_series_sub, Y_train, validation_data=(X_valid_series_sub, Y_valid), epochs=epochs, verbose=2)

Epoch 1/20
11/11 - 4s - 359ms/step - loss: 98.5148 - val_loss: 4.9884
Epoch 2/20
11/11 - 0s - 13ms/step - loss: 6.5289 - val_loss: 6.2736
Epoch 3/20
11/11 - 0s - 15ms/step - loss: 6.4010 - val_loss: 5.0431
Epoch 4/20
11/11 - 0s - 14ms/step - loss: 5.4654 - val_loss: 5.2121
Epoch 5/20
11/11 - 0s - 13ms/step - loss: 4.6125 - val_loss: 4.3710
Epoch 6/20
11/11 - 0s - 15ms/step - loss: 4.2339 - val_loss: 3.8592
Epoch 7/20
11/11 - 0s - 13ms/step - loss: 3.9177 - val_loss: 3.4922
Epoch 8/20
11/11 - 0s - 16ms/step - loss: 3.5111 - val_loss: 3.3184
Epoch 9/20
11/11 - 0s - 14ms/step - loss: 2.9520 - val_loss: 2.5441
Epoch 10/20
11/11 - 0s - 12ms/step - loss: 2.2775 - val_loss: 1.6939
Epoch 11/20
11/11 - 0s - 14ms/step - loss: 1.8716 - val_loss: 1.3770
Epoch 12/20
11/11 - 0s - 14ms/step - loss: 1.7615 - val_loss: 1.2637
Epoch 13/20
11/11 - 0s - 13ms/step - loss: 1.7299 - val_loss: 1.2524
Epoch 14/20
11/11 - 0s - 14ms/step - loss: 1.7083 - val_loss: 1.2522
Epoch 15/20
11/11 - 0s - 15ms/step - loss