In [None]:
# Forecasting use LSTM For AQI (AIR QUALITY INDEX) Based on PM10

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

%matplotlib inline

In [37]:
# Import Dataset
dataset = pd.read_csv('ispu_data.csv')

# Select column
cols = list(dataset)[1:4]

# Extracting
datelist = list(dataset['tanggal'])
datelist = [dt.datetime.strptime(date, '%Y-%m-%d').date() for date in datelist]

print('Training set shape = {}'.format(dataset.shape))
print('All timestamp = {}'.format(len(datelist)))
print('Columns Selected : {}'.format(cols))

Training set shape = (1076, 10)
All timestamp = 1076
Columns Selected : ['pm10', 'so2', 'co']


In [38]:
# DATA PREPROCESSING
dataset = dataset[cols].astype(str)
for i in cols:
    for j in range(0, len(dataset)):
        dataset[i][j] = dataset[i][j].replace(',', '')

dataset = dataset.astype(float)
# Using feature columns
training_set = dataset.values

print('Shape of training set = {}.'.format(training_set.shape))
training_set

Shape of training set = (1076, 3).


array([[29., 15.,  7.],
       [24., 17.,  6.],
       [23., 16.,  6.],
       ...,
       [61., 54., 15.],
       [60., 53., 17.],
       [64., 52., 44.]])

In [39]:
# Feature Scale
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
training_set_scaled = scaler.fit_transform(training_set)

scaler_predict = StandardScaler()
scaler_predict.fit_transform(training_set[:, 0:1])

array([[-2.1921057 ],
       [-2.52555766],
       [-2.59224805],
       ...,
       [-0.0580132 ],
       [-0.12470359],
       [ 0.14205797]])

In [40]:
# Create data structure 60 timestamps and 1 output
x_train = []
y_train = []

n_future = 30
n_past = 60

for i in range(n_past, len(training_set_scaled) - n_future + 1):
    x_train.append(training_set_scaled[i - n_past:i, 0:dataset.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

x_train, y_train = np.array(x_train), np.array(y_train)

print('x_train shape = {}.'.format(x_train.shape))
print('y_train shape = {}.'.format(y_train.shape))

x_train shape = (987, 60, 2).
y_train shape = (987, 1).


In [41]:
# LSTM Model
model = Sequential()
# 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset.shape[1]-1)))
# 2nd LSTM layer
model.add(LSTM(units=10, return_sequences=False))
# Dropout
model.add(Dropout(0.25))
# Output layer
model.add(Dense(units=1, activation='linear'))

# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.01), loss='mean_squared_error')

In [44]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 64)            17152     
                                                                 
 lstm_1 (LSTM)               (None, 10)                3000      
                                                                 
 dropout (Dropout)           (None, 10)                0         
                                                                 
 dense (Dense)               (None, 1)                 11        
                                                                 
Total params: 20,163
Trainable params: 20,163
Non-trainable params: 0
_________________________________________________________________


In [46]:
# Training Model

es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(x_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb], validation_split=0.2, verbose=1, batch_size=256)

Epoch 1/30
Epoch 1: val_loss improved from inf to 1.09016, saving model to weights.h5
Epoch 2/30
Epoch 2: val_loss did not improve from 1.09016
Epoch 3/30
Epoch 3: val_loss did not improve from 1.09016
Epoch 4/30
Epoch 4: val_loss did not improve from 1.09016
Epoch 5/30
Epoch 5: val_loss improved from 1.09016 to 1.07002, saving model to weights.h5
Epoch 6/30
Epoch 6: val_loss improved from 1.07002 to 1.06747, saving model to weights.h5
Epoch 7/30
Epoch 7: val_loss did not improve from 1.06747
Epoch 8/30
Epoch 8: val_loss did not improve from 1.06747
Epoch 9/30
Epoch 9: val_loss did not improve from 1.06747
Epoch 10/30
Epoch 10: val_loss did not improve from 1.06747
Epoch 11/30
Epoch 11: val_loss did not improve from 1.06747
Epoch 12/30
Epoch 12: val_loss did not improve from 1.06747
Epoch 13/30
Epoch 13: val_loss did not improve from 1.06747
Epoch 14/30
Epoch 14: val_loss did not improve from 1.06747
Epoch 15/30
Epoch 15: val_loss did not improve from 1.06747
Epoch 16/30
Epoch 16: Redu

In [47]:
# Future Prediction
datelist_future = pd.date_range(datelist[-1], periods=n_future, freq='1d').tolist()
datelist_future_ = []
for present_timestamp in datelist_future:
    datelist_future_.append(present_timestamp.date())

In [None]:
# Predictions
predictions_future = model.predict(x_train[-n_future:])
predictions_train = model.predict(x_train[n_past:])