In [None]:
import numpy as np
import scipy.io
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

def create_samples(dataset, back_step = 1, test = 0):
    dataX, dataY = [], []
    for item in range(dataset.shape[0]):
        if test == 0:
            for t in range(dataset.shape[1] - back_step):
                a = dataset[item, t:(t + back_step)]
                dataX.append(a)
                dataY.append(dataset[item, t + back_step])
        else:
            for t in range(dataset.shape[1] - back_step + 1):
                a = dataset[item, t:(t + back_step)]
                dataX.append(a)
    return np.array(dataX), np.array(dataY)

def reshapepred(predictions, feature_num):
    time_num = np.int(len(predictions) / feature_num)
    pred_mat = np.zeros((time_num, feature_num))
    for t in range(time_num):
        for item in range(feature_num):
            pred_mat[t, item] = predictions[item * time_num + t]
    return pred_mat

def mean_absolute_percentage_error(y_true, y_pred, pos): 
#     y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true[pos] - y_pred[pos]) / y_true[pos])) * 100
def root_mean_squared_error(y_true, y_pred, pos): 
#     y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sqrt(np.mean(np.square(y_true[pos] - y_pred[pos])))

In [None]:
# Load data
missing_rate = 0.4
print('Missing rate = %f'%missing_rate)

tensor = scipy.io.loadmat('../datasets/Shanghai-pollutant-dataset/NTS_tensor.mat')
dense_tensor = tensor['tensor']

#print('The shape of the initial dataset is:')
#print(dense_tensor.shape)
dim1, dim2, dim3 = dense_tensor.shape

# # =============================================================================
# ### Random missing (PM) scenario
# ### Set the PM scenario by:

tensor = scipy.io.loadmat('../datasets/Shanghai-pollutant-dataset/NTS_random_tensor.mat')
random_tensor = tensor['tensor']
binary_tensor = np.ones((dim1, dim2, dim3))
binary_tensor[random_tensor < missing_rate] = 0

# # =============================================================================
### Non random missing (CM) scenario
# ### Set the CM scenario by:
'''
missing_period = 6 #data missing in continuous [6, 12, 24, 48] hours
random_array_file = '../datasets/Shanghai-pollutant-dataset/NTS_random_array' + str(missing_period) + '.mat'
tensor = scipy.io.loadmat(random_array_file)
random_array = tensor['array'][0]
binary_reshape_tensor = np.ones_like(dense_tensor)
binary_reshape_tensor = binary_reshape_tensor.reshape(dim1,dim2,int(dim3 / missing_period),missing_period)
pos = np.where(random_array < missing_rate)
binary_reshape_tensor[:, :, pos, :] = 0
binary_tensor = binary_reshape_tensor.reshape(dim1, dim2, dim3)
sparse_tensor = np.multiply(dense_tensor, binary_tensor)
'''
sparse_tensor = np.multiply(dense_tensor, binary_tensor)


dense_tensor = dense_tensor[:, :, 5768:]
sparse_tensor = sparse_tensor[:, :, 5768:]

dim1, dim2, dim3 = dense_tensor.shape
dense_mat = dense_tensor.reshape(dim1 * dim2, dim3)
sparse_mat = sparse_tensor.reshape(dim1 * dim2, dim3)
print(sparse_mat.shape)


# Scale data
scaler = MinMaxScaler(feature_range=(0, 1))
sc_data_mat = scaler.fit_transform(sparse_mat.T).T

test_rate = 0.082

look_back = 24

train_len = int((1 - test_rate) * sc_data_mat.shape[1])
test_len = sc_data_mat.shape[1] - train_len
training_set = sc_data_mat[:, :train_len]
test_set = sc_data_mat[:, train_len - look_back:]
print('The size of training set is:')
print(training_set.shape)
print()
print('The size of test set is:')
print(test_set.shape)

# Create samples
trainX, trainY = create_samples(training_set, look_back)
testX, testY = create_samples(test_set, look_back)

# reshape input to be [samples, time steps, features]
keras_trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
keras_testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

# create and train LSTM network
model = Sequential()
model.add(LSTM(60, input_shape = (look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(keras_trainX, trainY, epochs=30, batch_size=200, verbose=1)

# Make prediction on test set
testPredict = model.predict(keras_testX)

test_pred_mat = reshapepred(testPredict, 72)
test_mat_pred = scaler.inverse_transform(test_pred_mat)

# Error calculation
test_ground_truth = dense_mat[:, train_len:].T
pos = np.where(test_ground_truth != 0)
testPred_rmse = root_mean_squared_error(test_ground_truth, test_mat_pred, pos)
print('Test prediction RMSE: %.2f RMSE' % (testPred_rmse))
testPred_mape = mean_absolute_percentage_error(test_ground_truth, test_mat_pred, pos)
print('Test prediction MAPE: %.2f%% MAPE' % (testPred_mape))