In [90]:
import numpy as np
import pandas as pd
import matplotlib
import tensorflow as tf
import sklearn
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [91]:
base = pd.read_csv('dataset/poluicao.csv')
base = base.dropna()

In [92]:
#colunas irrelevantes para a previsão
base = base.drop(['No', 'year', 'month', 'day', 'hour', 'cbwd'], axis=1)

In [93]:
base.shape

(41757, 7)

In [94]:
previsores = base.drop(columns=['pm2.5']).values
alvo = base[['pm2.5']].values

In [95]:
normalizador_previsores = MinMaxScaler(feature_range=(0, 1))
previsores_normalizados = normalizador_previsores.fit_transform(previsores)

In [96]:
normalizador_alvo = MinMaxScaler(feature_range=(0, 1))
alvo_normalizado = normalizador_alvo.fit_transform(alvo)

In [97]:
X = []
y = []
for i in range(120, len(previsores_normalizados)):
    X.append(previsores_normalizados[i-120:i])
    y.append(alvo_normalizado[i, 0])

X, y = np.array(X), np.array(y)

In [98]:
previsores_normalizados

array([[0.35294118, 0.24590164, 0.52727273, 0.00237151, 0.        ,
        0.        ],
       [0.36764706, 0.24590164, 0.52727273, 0.00394662, 0.        ,
        0.        ],
       [0.42647059, 0.2295082 , 0.54545455, 0.00552173, 0.        ,
        0.        ],
       ...,
       [0.26470588, 0.26229508, 0.78181818, 0.42873071, 0.        ,
        0.        ],
       [0.26470588, 0.24590164, 0.78181818, 0.43584525, 0.        ,
        0.        ],
       [0.27941176, 0.26229508, 0.78181818, 0.44138468, 0.        ,
        0.        ]])

In [99]:
alvo_normalizado

array([[0.12977867],
       [0.14889336],
       [0.15995976],
       ...,
       [0.01006036],
       [0.00804829],
       [0.01207243]])

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

In [101]:
print("Shape de X_train:", X_train.shape)
print("Shape de X_test:", X_test.shape)

Shape de X_train: (29145, 120, 6)
Shape de X_test: (12492, 120, 6)


In [102]:
regressor = Sequential()
regressor.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units=50, return_sequences=True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units=50))
regressor.add(Dropout(0.2))

regressor.add(Dense(units=1, activation='linear'))

  super().__init__(**kwargs)


In [103]:
regressor.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

In [104]:
es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

In [105]:
regressor.fit(X_train, y_train, validation_data=(X_test, y_test), 
                        epochs=10, batch_size=32, callbacks=[es, reduce_lr])

Epoch 1/10
[1m911/911[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 81ms/step - loss: 0.0067 - mean_absolute_error: 0.0597 - val_loss: 0.0052 - val_mean_absolute_error: 0.0471 - learning_rate: 0.0010
Epoch 2/10
[1m911/911[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 78ms/step - loss: 0.0051 - mean_absolute_error: 0.0506 - val_loss: 0.0053 - val_mean_absolute_error: 0.0511 - learning_rate: 0.0010
Epoch 3/10
[1m911/911[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 79ms/step - loss: 0.0048 - mean_absolute_error: 0.0487 - val_loss: 0.0055 - val_mean_absolute_error: 0.0543 - learning_rate: 0.0010
Epoch 4/10
[1m911/911[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 78ms/step - loss: 0.0045 - mean_absolute_error: 0.0474 - val_loss: 0.0040 - val_mean_absolute_error: 0.0456 - learning_rate: 0.0010
Epoch 5/10
[1m911/911[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 78ms/step - loss: 0.0044 - mean_absolute_error: 0.0463 - val_loss: 0.0040 - val_me

<keras.src.callbacks.history.History at 0x2bc60b8c7d0>

In [106]:
previsoes = regressor.predict(X_test)

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 22ms/step


In [112]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, previsoes)
print(f'MSE no conjunto de teste: {mse}')

MSE no conjunto de teste: 0.0037908096312061442
