Air Pollution Forecasting - One To One - RNN LSTM

We can use this data and frame a forecasting problem where, given the
pollution for prior hours, we forecast the pollution at the next hour

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

In [None]:
%cd '/content/gdrive/My Drive/../'

In [None]:
# LSTM for air pollution problem with regression framing
import numpy
import matplotlib.pyplot as plt
import math
import pandas as pd
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
  dataX, dataY = [], []
  for i in range(len(dataset)-look_back-1):
    #t
    a = dataset[i:(i+look_back), 0]
    dataX.append(a)
    #t+1
    dataY.append(dataset[i + look_back, 0])
  return numpy.array(dataX), numpy.array(dataY)

In [None]:
# fix random seed for reproducibility
numpy.random.seed(7)

In [None]:
# load the dataset
dataframe = pd.read_csv('pollution_new.csv', index_col= 0)
dataframe.head()

In [None]:
# load the dataset
dataframe = pd.read_csv('pollution_new.csv', index_col= 0)
dataframe.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [None]:
# split into train and test sets
train_size = 365 * 24 * 4 # vì dữ liệu theo giờ (lấy 4 năm đầu train)
#train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [None]:
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
print(trainX[:5])
print(trainY[:5])
print(testX[:5])
print(testY[:5])

In [None]:
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(32, input_shape=(1, look_back))) # 64, ...
model.add(Dense(1))

In [None]:
model.summary()

Calculate params

lstm (LSTM) = [(num_units + input_dim + 1) x num_units] x 4 = [(32 + 1 + 1) x 32] x 4 = 4352

dense = ((current layer n x previous layer n) + bias) = 1 x 32 + 1 = 33

In [None]:
# Early Stopping
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
                      min_delta=0.0001, # minimium amount of change to count as an improvement
                      patience=20, # how many epochs to wait before stopping
                      restore_best_weights=True,
                      )

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(trainX, trainY,
                    validation_data=(testX, testY),
                    epochs=50, #100, 200
                    batch_size=128, # 32, 64, 128
                    #callbacks=[early_stopping],
)

In [None]:
# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [None]:
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])

In [None]:
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
trainY[0][:5]
trainY[0][:5]

In [None]:
# calculate root mean squared error
train_rmse = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train RMSE: %.2f RMSE' % (train_rmse))
test_rmse = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test RMSE: %.2f RMSE' % (test_rmse))

In [None]:
train_mae = mean_absolute_error(trainY[0], trainPredict[:,0])
print('Train MAE: %.2f MAE' % (train_mae))
test_mae = mean_absolute_error(testY[0], testPredict[:,0])
print('test MAE: %.2f MAE' % (test_mae))

In [None]:
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

In [None]:
# plot baseline and predictions
plt.figure(figsize=(10,6))
plt.plot(scaler.inverse_transform(dataset), label='Real Data')
plt.plot(trainPredictPlot, label='trainPredict')
plt.plot(testPredictPlot, label='testPredict')
plt.legend(title="Notes")
plt.show()

In [None]:
ws = {}
for layer in model.layers:
  ws[layer.name] = layer.get_weights()
ws

In [None]:
# x_new = 8 => next time = ?
x_new = scaler.transform([[8]])
x_new = numpy.reshape(x_new, (1, 1, 1))

In [None]:
# x_new = 8 => next time = ?
x_new = scaler.transform([[8]])
x_new = numpy.reshape(x_new, (1, 1, 1))