In [7]:
from pandas import DataFrame
import pandas

# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pandas.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [8]:
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

dataset = read_csv('data/pm2.5_non_normalised.csv', header=0, index_col=0)

In [9]:
from math import sqrt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import numpy as np
from sklearn.metrics import mean_squared_error

def grid_search(dataset, lookback, nodes, layers, split_percentage):
  scores = list()
  
  for lookback_period in lookback:
    values = series_to_supervised(dataset, lookback_period, 1).values
    
    n_train_hours = (int)(split_percentage * len(values))
    train = values[:n_train_hours, :]
    test = values[n_train_hours:, :]
    
    train_X, train_y = train[:, :-1], train[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]
    train_X = train_X.reshape((train_X.shape[0], lookback_period))
    test_X = test_X.reshape((test_X.shape[0], lookback_period))
    
    for number_of_nodes in nodes:            
        for number_of_layers in layers:
            model = Sequential()
            for i in range(number_of_layers):
                model.add(Dense(number_of_nodes, activation='relu', input_dim=lookback_period))

            model.add(Dense(1))
            model.compile(loss='mae', optimizer='adam')

            name = str(lookback_period)+'_'+str(number_of_nodes)+'_'+str(number_of_layers)
            print("Starting training for : ", name)

            history = model.fit(train_X, train_y, epochs=100, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
            yhat = model.predict(test_X)

            np.save('MLP_runtime/history/' + name, history)
            np.save('MLP_runtime/test_y/' + name, test_y)
            np.save('MLP_runtime/yhat/' + name, yhat)
            model.save('MLP_runtime/model/' + name + '.h5')
            

            rmse = mean_squared_error(yhat, test_y)
            print("RMSE for ", name, " = " , rmse)

            scores.append([rmse, lookback_period, number_of_nodes, number_of_layers])
            np.save('MLP_scores', np.array(scores))

    return scores          

In [10]:
layers = [2,4,16,32]
lookback = [1,3,5,7]
nodes = [25,50,75]
split_percentage = 0.8

scores = grid_search(dataset, lookback, nodes, layers, split_percentage)
print(scores)
np.save('MLP_scores', np.array(scores))

Starting training for :  1_25_2
Train on 35038 samples, validate on 8760 samples
Epoch 1/100
 - 1s - loss: 21.3750 - val_loss: 12.5006
Epoch 2/100
 - 1s - loss: 13.4750 - val_loss: 12.5019
Epoch 3/100
 - 1s - loss: 13.4725 - val_loss: 12.4904
Epoch 4/100
 - 1s - loss: 13.4750 - val_loss: 12.4885
Epoch 5/100
 - 1s - loss: 13.4743 - val_loss: 12.4934
Epoch 6/100
 - 1s - loss: 13.4744 - val_loss: 12.4931
Epoch 7/100
 - 1s - loss: 13.4747 - val_loss: 12.4944
Epoch 8/100
 - 1s - loss: 13.4757 - val_loss: 12.4965
Epoch 9/100
 - 1s - loss: 13.4737 - val_loss: 12.4944
Epoch 10/100
 - 1s - loss: 13.4763 - val_loss: 12.4925
Epoch 11/100
 - 1s - loss: 13.4746 - val_loss: 12.4986
Epoch 12/100
 - 1s - loss: 13.4756 - val_loss: 12.4937
Epoch 13/100
 - 1s - loss: 13.4757 - val_loss: 12.5067
Epoch 14/100
 - 1s - loss: 13.4780 - val_loss: 12.4957
Epoch 15/100
 - 1s - loss: 13.4747 - val_loss: 12.4963
Epoch 16/100
 - 1s - loss: 13.4771 - val_loss: 12.4923
Epoch 17/100
 - 1s - loss: 13.4776 - val_loss: 1

Epoch 47/100
 - 1s - loss: 13.4928 - val_loss: 12.5727
Epoch 48/100
 - 1s - loss: 13.4968 - val_loss: 12.5469
Epoch 49/100
 - 1s - loss: 13.4898 - val_loss: 12.5940
Epoch 50/100
 - 1s - loss: 13.4917 - val_loss: 12.5607
Epoch 51/100
 - 1s - loss: 13.4894 - val_loss: 12.5882
Epoch 52/100
 - 1s - loss: 13.4886 - val_loss: 12.6155
Epoch 53/100
 - 1s - loss: 13.4948 - val_loss: 12.5198
Epoch 54/100
 - 1s - loss: 13.4874 - val_loss: 12.5525
Epoch 55/100
 - 1s - loss: 13.4872 - val_loss: 12.5187
Epoch 56/100
 - 1s - loss: 13.4859 - val_loss: 12.5577
Epoch 57/100
 - 1s - loss: 13.4724 - val_loss: 12.6148
Epoch 58/100
 - 1s - loss: 13.4944 - val_loss: 12.5538
Epoch 59/100
 - 1s - loss: 13.4890 - val_loss: 12.5426
Epoch 60/100
 - 1s - loss: 13.4862 - val_loss: 12.5750
Epoch 61/100
 - 1s - loss: 13.4856 - val_loss: 12.5691
Epoch 62/100
 - 1s - loss: 13.4862 - val_loss: 12.5507
Epoch 63/100
 - 1s - loss: 13.4853 - val_loss: 12.5474
Epoch 64/100
 - 1s - loss: 13.4931 - val_loss: 12.5416
Epoch 65/1

Epoch 94/100
 - 1s - loss: 13.9655 - val_loss: 12.5374
Epoch 95/100
 - 1s - loss: 14.0865 - val_loss: 12.7356
Epoch 96/100
 - 1s - loss: 13.9441 - val_loss: 12.5558
Epoch 97/100
 - 1s - loss: 14.0438 - val_loss: 12.5832
Epoch 98/100
 - 1s - loss: 13.9020 - val_loss: 12.5581
Epoch 99/100
 - 1s - loss: 13.9568 - val_loss: 12.5591
Epoch 100/100
 - 1s - loss: 14.0689 - val_loss: 12.6290
RMSE for  1_25_16  =  600.6961177274407
Starting training for :  1_25_32
Train on 35038 samples, validate on 8760 samples
Epoch 1/100
 - 4s - loss: 20.5783 - val_loss: 17.6773
Epoch 2/100
 - 2s - loss: 15.6521 - val_loss: 13.2582
Epoch 3/100
 - 2s - loss: 14.6570 - val_loss: 13.5174
Epoch 4/100
 - 2s - loss: 14.5810 - val_loss: 13.2668
Epoch 5/100
 - 2s - loss: 14.6496 - val_loss: 13.4020
Epoch 6/100
 - 2s - loss: 14.7644 - val_loss: 13.3651
Epoch 7/100
 - 2s - loss: 14.7549 - val_loss: 13.8210
Epoch 8/100
 - 2s - loss: 14.2884 - val_loss: 13.4610
Epoch 9/100
 - 2s - loss: 14.5022 - val_loss: 13.0615
Epoch 

Epoch 39/100
 - 1s - loss: 13.4767 - val_loss: 12.5127
Epoch 40/100
 - 1s - loss: 13.4799 - val_loss: 12.5120
Epoch 41/100
 - 1s - loss: 13.4766 - val_loss: 12.5084
Epoch 42/100
 - 1s - loss: 13.4786 - val_loss: 12.5086
Epoch 43/100
 - 1s - loss: 13.4751 - val_loss: 12.5083
Epoch 44/100
 - 1s - loss: 13.4813 - val_loss: 12.5071
Epoch 45/100
 - 1s - loss: 13.4840 - val_loss: 12.5053
Epoch 46/100
 - 1s - loss: 13.4699 - val_loss: 12.5141
Epoch 47/100
 - 1s - loss: 13.4769 - val_loss: 12.5141
Epoch 48/100
 - 1s - loss: 13.4773 - val_loss: 12.5102
Epoch 49/100
 - 1s - loss: 13.4801 - val_loss: 12.5091
Epoch 50/100
 - 1s - loss: 13.4768 - val_loss: 12.5023
Epoch 51/100
 - 1s - loss: 13.4725 - val_loss: 12.5305
Epoch 52/100
 - 1s - loss: 13.4739 - val_loss: 12.5128
Epoch 53/100
 - 1s - loss: 13.4706 - val_loss: 12.5049
Epoch 54/100
 - 1s - loss: 13.4769 - val_loss: 12.5107
Epoch 55/100
 - 1s - loss: 13.4681 - val_loss: 12.5047
Epoch 56/100
 - 1s - loss: 13.4752 - val_loss: 12.5030
Epoch 57/1

Epoch 86/100
 - 1s - loss: 13.5196 - val_loss: 12.6109
Epoch 87/100
 - 1s - loss: 13.5340 - val_loss: 12.5709
Epoch 88/100
 - 1s - loss: 13.5329 - val_loss: 12.5809
Epoch 89/100
 - 1s - loss: 13.5280 - val_loss: 12.5686
Epoch 90/100
 - 1s - loss: 13.5116 - val_loss: 12.5871
Epoch 91/100
 - 1s - loss: 13.5246 - val_loss: 12.5727
Epoch 92/100
 - 1s - loss: 13.5179 - val_loss: 12.5993
Epoch 93/100
 - 1s - loss: 13.5318 - val_loss: 12.5824
Epoch 94/100
 - 1s - loss: 13.5172 - val_loss: 12.5737
Epoch 95/100
 - 1s - loss: 13.5106 - val_loss: 12.5915
Epoch 96/100
 - 1s - loss: 13.5124 - val_loss: 12.6793
Epoch 97/100
 - 1s - loss: 13.5215 - val_loss: 12.5859
Epoch 98/100
 - 1s - loss: 13.5405 - val_loss: 12.6050
Epoch 99/100
 - 1s - loss: 13.5169 - val_loss: 12.6839
Epoch 100/100
 - 1s - loss: 13.5163 - val_loss: 12.5914
RMSE for  1_50_4  =  601.1876953216556
Starting training for :  1_50_16
Train on 35038 samples, validate on 8760 samples
Epoch 1/100
 - 3s - loss: 18.2174 - val_loss: 12.6041

Epoch 31/100
 - 2s - loss: 15.9686 - val_loss: 16.0467
Epoch 32/100
 - 2s - loss: 15.1939 - val_loss: 13.0242
Epoch 33/100
 - 2s - loss: 19.7993 - val_loss: 14.4665
Epoch 34/100
 - 2s - loss: 14.6680 - val_loss: 12.6961
Epoch 35/100
 - 2s - loss: 14.9579 - val_loss: 16.1462
Epoch 36/100
 - 2s - loss: 15.3755 - val_loss: 14.9347
Epoch 37/100
 - 2s - loss: 17.5857 - val_loss: 15.6634
Epoch 38/100
 - 2s - loss: 16.3676 - val_loss: 14.4934
Epoch 39/100
 - 2s - loss: 14.6919 - val_loss: 14.9885
Epoch 40/100
 - 2s - loss: 14.5679 - val_loss: 13.3534
Epoch 41/100
 - 2s - loss: 14.4593 - val_loss: 13.1771
Epoch 42/100
 - 2s - loss: 15.3851 - val_loss: 13.6532
Epoch 43/100
 - 2s - loss: 15.2307 - val_loss: 14.7429
Epoch 44/100
 - 2s - loss: 15.7107 - val_loss: 15.3834
Epoch 45/100
 - 2s - loss: 14.6852 - val_loss: 13.5469
Epoch 46/100
 - 2s - loss: 17.9764 - val_loss: 14.6492
Epoch 47/100
 - 2s - loss: 14.4797 - val_loss: 14.8207
Epoch 48/100
 - 2s - loss: 14.5969 - val_loss: 13.5626
Epoch 49/1

Epoch 78/100
 - 1s - loss: 13.4740 - val_loss: 12.5353
Epoch 79/100
 - 1s - loss: 13.4820 - val_loss: 12.5280
Epoch 80/100
 - 1s - loss: 13.4798 - val_loss: 12.5423
Epoch 81/100
 - 1s - loss: 13.4758 - val_loss: 12.5331
Epoch 82/100
 - 1s - loss: 13.4810 - val_loss: 12.5330
Epoch 83/100
 - 1s - loss: 13.4775 - val_loss: 12.5286
Epoch 84/100
 - 1s - loss: 13.4793 - val_loss: 12.5339
Epoch 85/100
 - 1s - loss: 13.4765 - val_loss: 12.5306
Epoch 86/100
 - 1s - loss: 13.4723 - val_loss: 12.5462
Epoch 87/100
 - 1s - loss: 13.4777 - val_loss: 12.5352
Epoch 88/100
 - 1s - loss: 13.4806 - val_loss: 12.5323
Epoch 89/100
 - 1s - loss: 13.4766 - val_loss: 12.5421
Epoch 90/100
 - 1s - loss: 13.4762 - val_loss: 12.5293
Epoch 91/100
 - 1s - loss: 13.4773 - val_loss: 12.5293
Epoch 92/100
 - 1s - loss: 13.4780 - val_loss: 12.5202
Epoch 93/100
 - 1s - loss: 13.4738 - val_loss: 12.5252
Epoch 94/100
 - 1s - loss: 13.4801 - val_loss: 12.5261
Epoch 95/100
 - 1s - loss: 13.4790 - val_loss: 12.5309
Epoch 96/1

Epoch 23/100
 - 2s - loss: 14.3432 - val_loss: 13.1583
Epoch 24/100
 - 2s - loss: 14.1940 - val_loss: 12.9162
Epoch 25/100
 - 2s - loss: 14.3330 - val_loss: 12.4850
Epoch 26/100
 - 2s - loss: 15.3069 - val_loss: 12.5771
Epoch 27/100
 - 2s - loss: 14.6662 - val_loss: 12.5649
Epoch 28/100
 - 2s - loss: 14.2727 - val_loss: 13.1354
Epoch 29/100
 - 2s - loss: 14.4356 - val_loss: 12.8619
Epoch 30/100
 - 2s - loss: 14.7246 - val_loss: 12.9054
Epoch 31/100
 - 2s - loss: 13.9609 - val_loss: 12.5017
Epoch 32/100
 - 2s - loss: 14.0206 - val_loss: 13.7221
Epoch 33/100
 - 2s - loss: 14.0627 - val_loss: 13.4297
Epoch 34/100
 - 2s - loss: 14.3128 - val_loss: 13.3750
Epoch 35/100
 - 2s - loss: 14.1328 - val_loss: 12.5770
Epoch 36/100
 - 2s - loss: 14.5775 - val_loss: 12.5218
Epoch 37/100
 - 2s - loss: 13.9098 - val_loss: 13.0195
Epoch 38/100
 - 2s - loss: 14.0201 - val_loss: 13.2220
Epoch 39/100
 - 2s - loss: 14.2044 - val_loss: 13.0326
Epoch 40/100
 - 2s - loss: 14.5691 - val_loss: 12.7819
Epoch 41/1

Epoch 70/100
 - 3s - loss: 15.4470 - val_loss: 12.6955
Epoch 71/100
 - 3s - loss: 15.8213 - val_loss: 13.0583
Epoch 72/100
 - 3s - loss: 15.9595 - val_loss: 13.1243
Epoch 73/100
 - 3s - loss: 15.5053 - val_loss: 12.7540
Epoch 74/100
 - 3s - loss: 16.4009 - val_loss: 13.7432
Epoch 75/100
 - 3s - loss: 15.5623 - val_loss: 13.2036
Epoch 76/100
 - 3s - loss: 15.5570 - val_loss: 13.4791
Epoch 77/100
 - 3s - loss: 15.5624 - val_loss: 14.1722
Epoch 78/100
 - 3s - loss: 14.7266 - val_loss: 12.7983
Epoch 79/100
 - 3s - loss: 15.3188 - val_loss: 12.9790
Epoch 80/100
 - 3s - loss: 15.4414 - val_loss: 13.3830
Epoch 81/100
 - 3s - loss: 14.6120 - val_loss: 12.7079
Epoch 82/100
 - 3s - loss: 14.3704 - val_loss: 12.7984
Epoch 83/100
 - 3s - loss: 14.8689 - val_loss: 12.9679
Epoch 84/100
 - 3s - loss: 14.7756 - val_loss: 13.1704
Epoch 85/100
 - 3s - loss: 14.6537 - val_loss: 12.7912
Epoch 86/100
 - 3s - loss: 15.3712 - val_loss: 13.0197
Epoch 87/100
 - 3s - loss: 14.2864 - val_loss: 12.8501
Epoch 88/1

In [11]:
loaded_scores = np.load('MLP_scores.npy')
sorted_scores = loaded_scores[loaded_scores[:, 0].argsort()]
sorted_scores

array([[600.34910145,   1.        ,  25.        ,   4.        ],
       [600.69611773,   1.        ,  25.        ,  16.        ],
       [600.77791459,   1.        ,  75.        ,   2.        ],
       [601.17527511,   1.        ,  25.        ,  32.        ],
       [601.18769532,   1.        ,  50.        ,   4.        ],
       [601.19319916,   1.        ,  75.        ,   4.        ],
       [601.87557738,   1.        ,  50.        ,  32.        ],
       [602.40114443,   1.        ,  50.        ,  16.        ],
       [603.89578074,   1.        ,  50.        ,   2.        ],
       [604.11093481,   1.        ,  25.        ,   2.        ],
       [608.40216947,   1.        ,  75.        ,  16.        ],
       [646.84199841,   1.        ,  75.        ,  32.        ]])