In [1]:
from google.colab import drive
drive.mount('/content/drive')
%tensorflow_version 1.x
import tensorflow as tf
print(tf.__version__)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
TensorFlow 1.x selected.
1.15.2


In [2]:
import pandas as pd
import numpy as np
import datetime
import time
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import math
import random

# read data from csv file and process X, y
def data_loader(data_path):
		# load raw data
		df = pd.read_csv(data_path)

		# clean data with missing values          
		'''drop the rows directly -> mess up the order
			first 24 rows have pm2.5 value that is NaN -> discard
			else: forward filling'''
		df = df[24:].fillna(method='ffill')

		# integrate 'year', 'month', 'day', 'hour', 'No' as one attribute
		df['time'] = df.apply(lambda x : datetime.datetime(year=x['year'], month=x['month'], day=x['day'], hour=x['hour']), axis=1)
		df.drop(columns=['year', 'month', 'day', 'hour', 'No'], inplace=True)
		df = df.set_index('time')

		# expand "cbwd" into 4 attributes 'SE', 'cv', 'NW', 'NE'
		df = df.join(pd.get_dummies(df['cbwd'])) # one-hot encoding
		del df['cbwd']

		# df = df.drop('cbwd', axis = 1) # remove the attribute temporarily

		X = df.iloc[:,1:].astype(np.float32)
		y = df['pm2.5'].astype(np.float32).to_frame()
	
		return X, y
  
# Z-score normalisation
def data_normalisation(train, test):
		X_train, y_train = train[0], train[1]
		X_test, y_test = test[0], test[1]

		mean_x, std_x = X_train.mean(axis=0), X_train.std(axis=0)
		mean_y, std_y = y_train.mean(axis=0), y_train.std(axis=0)
	
		# Use the mean & std of train. Since there's no way for us to know the future.
		# (X_train, y_train) = ((X_train-mean_x)/std_x, (y_train-mean_y)/std_y)
		# (X_val, y_val) = ((X_val-mean_x)/std_x, (y_val-mean_y)/std_y) 
		# (X_test, y_test) = ((X_test-mean_x)/std_x, (y_test-mean_y)/std_y)
		(X_train, y_train) = ((X_train-mean_x)/std_x, np.log(np.asarray(y_train) + 1))
		(X_test, y_test) = ((X_test-mean_x)/std_x, np.log(np.asarray(y_test) + 1))
		# (X_train, y_train) = ((X_train-mean_x)/std_x, y_train)
		# (X_val, y_val) = ((X_val-mean_x)/std_x, y_val) 
		# (X_test, y_test) = ((X_test-mean_x)/std_x, y_test)

		return (X_train, y_train),(X_test, y_test)

data_path = "/content/drive/My Drive/Colab Notebooks/PM2dot5.csv"
X, y = data_loader(data_path)

#X_data = X.values
#y_data = y.values
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.1, shuffle=True)

(X_train, y_train),(X_test, y_test) = data_normalisation((X_train, y_train),(X_test, y_test))

X_train.shape, y_train.shape, X_test.shape, y_test.shape
#X_data.shape, y_data.shape

((39420, 10), (39420, 1), (4380, 10), (4380, 1))

In [3]:
#Network parameters
n_input = 10 
n_hidden1, n_hidden2,n_hidden3 = 20, 10, 5
n_output = 1

#Defining the input and the output
X_p = tf.placeholder("float", [None, n_input], name='X_p') 
Y_p = tf.placeholder("float", [None, n_output], name='Y_p')

# forward
def multilayer_perceptron1(input_d):
    #DEFINING WEIGHTS AND BIASES
    b1 = tf.Variable(tf.random_normal([n_hidden1])) #Biases first hidden layer
    b2 = tf.Variable(tf.random_normal([n_output]))  #Biases output layer
    
    w1 = tf.Variable(tf.random_normal([n_input, n_hidden1]))   #Weights connecting input layer with first hidden layer
    w2 = tf.Variable(tf.random_normal([n_hidden1, n_output])) #Weights connecting first hidden layer with second hidden layer 

    layer_1 = tf.nn.leaky_relu(tf.add(tf.matmul(input_d, w1), b1), alpha=0.01) #Task of neurons of first hidden layer
    out_layer = tf.add(tf.matmul(layer_1, w2),b2)            #Task of neurons of output layer
    
    return out_layer
def multilayer_perceptron2(input_d):
#DEFINING WEIGHTS AND BIASES
    b1 = tf.Variable(tf.random_normal([n_hidden1])) #Biases first hidden layer
    b2 = tf.Variable(tf.random_normal([n_hidden2])) #Biases second hidden layer
    b3 = tf.Variable(tf.random_normal([n_output]))  #Biases output layer
    
    w1 = tf.Variable(tf.random_normal([n_input, n_hidden1]))   #Weights connecting input layer with first hidden layer
    w2 = tf.Variable(tf.random_normal([n_hidden1, n_hidden2])) #Weights connecting first hidden layer with second hidden layer 
    w3 = tf.Variable(tf.random_normal([n_hidden2, n_output]))

    layer_1 = tf.nn.leaky_relu(tf.add(tf.matmul(input_d, w1), b1), alpha=0.01) #Task of neurons of first hidden layer
    layer_2 = tf.nn.leaky_relu(tf.add(tf.matmul(layer_1, w2), b2), alpha=0.01) #Task of neurons of second hidden layer
    out_layer = tf.add(tf.matmul(layer_2, w3),b3)            #Task of neurons of output layer
    return out_layer
def multilayer_perceptron3(input_d):
#DEFINING WEIGHTS AND BIASES
    b1 = tf.Variable(tf.random_normal([n_hidden1])) #Biases first hidden layer
    b2 = tf.Variable(tf.random_normal([n_hidden2])) #Biases second hidden layer
    b3 = tf.Variable(tf.random_normal([n_hidden3])) #Biases third hidden layer
    b4 = tf.Variable(tf.random_normal([n_output]))  #Biases output layer
    
    w1 = tf.Variable(tf.random_normal([n_input, n_hidden1]))   #Weights connecting input layer with first hidden layer
    w2 = tf.Variable(tf.random_normal([n_hidden1, n_hidden2])) #Weights connecting first hidden layer with second hidden layer 
    w3 = tf.Variable(tf.random_normal([n_hidden2, n_hidden3])) #Weights connecting second hidden layer with third layer
    w4 = tf.Variable(tf.random_normal([n_hidden3, n_output]))  #Weights connecting third hidden layer with output layer


    layer_1 = tf.nn.leaky_relu(tf.add(tf.matmul(input_d, w1), b1), alpha=0.01) #Task of neurons of first hidden layer
    layer_2 = tf.nn.leaky_relu(tf.add(tf.matmul(layer_1, w2), b2), alpha=0.01) #Task of neurons of second hidden layer
    layer_3 = tf.nn.leaky_relu(tf.add(tf.matmul(layer_2, w3), b3), alpha=0.01) #Task of neurons of thrid layer
    out_layer = tf.add(tf.matmul(layer_3, w4),b4)            #Task of neurons of output layer
    return out_layer


In [4]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)

train_x = []
test_x = []
train_y = []
test_y = []


number_epochs = 20000
select_epoch_from = 1000
batch_iteration = 0

train_loss = 0
val_loss = 0

# record the train losses for the total 10 fold models
kfold_train_losses = []
# record the validate losses for the total 10 fold models
kfold_val_losses = []

hyperParameters = {'lr':[0.001,0.003,0.005,0.007],'batch_size':[64,128,256],'type_loss_func':["MSE","MAE","MSLE"],'type_optimizer':["GradientDescent","RMSProp","Momentum","Adam"],'neural_network_structure':["multilayer_perceptron1","multilayer_perceptron2","multilayer_perceptron3"]}

rmses = []
bset_lr = 0
best_batchsize = 0
best_loss_func = ""
best_type_optimizer = ""
best_neural_network_structure = ""
best_training_rmse = 0
best_validation_rmse = 0

start_time = time.clock()
with tf.Session() as sess: 
    for lr in hyperParameters['lr']:
        for batch_size in hyperParameters['batch_size']:
            for type_loss_func in hyperParameters['type_loss_func']:
                for type_optimizer in hyperParameters['type_optimizer']:
                    for neural_network_structure in hyperParameters['neural_network_structure']:
                        fold = 0
                        for train_index, val_index in kf.split(X_train, y_train):
                            if neural_network_structure == "multilayer_perceptron1":
                               neural_network = multilayer_perceptron1(X_p)
                            elif neural_network_structure == "multilayer_perceptron2":
                                 neural_network = multilayer_perceptron2(X_p)
                            elif neural_network_structure == "multilayer_perceptron3":
                                 neural_network = multilayer_perceptron3(X_p)
                            mse_1 = tf.keras.losses.MeanSquaredError()
                            RMSE_pre = mse_1(neural_network,Y_p)
                            eval_RMSE = tf.cast(tf.sqrt(RMSE_pre),dtype=tf.float32)
                            if type_loss_func == "MSE":
                               mse = tf.keras.losses.MeanSquaredError()
                               loss_func = mse(neural_network,Y_p)
                            elif type_loss_func == "MAE":
                                 mae = tf.keras.losses.MeanAbsoluteError()
                                 loss_func = mse(neural_network,Y_p)
                            elif type_loss_func == "MSLE":
                                 msle = tf.keras.losses.MeanSquaredLogarithmicError()()
                                 loss_func = msl(neural_network,Y_p)
                            # define optimizer
                            if type_optimizer == "GradientDescent":
                               optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss_func)
                            elif type_optimizer == "RMSProp":
                                 optimizer = tf.train.RMSPropOptimizer(lr).minimize(loss_func)
                            elif type_optimizer == "Momentum":
                                 optimizer = tf.train.MomentumOptimize(lr).minimize(loss_func)
                            elif type_optimizer == "Adam":
                                 optimizer = tf.train.AdamOptimizer(lr).minimize(loss_func)
                            # initializaing variables
                            init = tf.global_variables_initializer()
                            sess.run(init)
                            
                            fold = fold + 1
                            train_x = X_train[train_index]
                            train_y = y_train[train_index]
                            val_x = X_train[val_index]
                            val_y = y_train[val_index]
                            batch_iteration = int(len(train_x) / batch_size) + 1
                
                            print("Fold %s, lr: %s, batch_size: %s, type_loss_func: %s, type_optimizer: %s, neural_network_structure: %s" % (fold,lr,batch_size,type_loss_func,type_optimizer,neural_network_structure))
                
                            train_losses = []
                            val_losses = []
                            epochs = []
                            for epoch in range(1,number_epochs+1):
          
                                n = epoch % batch_iteration
                                s = 0 + batch_size * n
                                if n == 0:
                                   e = len(train_x)
                                else:
                                    e = batch_size + batch_size * n
                  
                                batch_X = np.array(train_x[s:e])
                                batch_y = np.array(train_y[s:e])

                                _, train_loss = sess.run([optimizer, loss_func], feed_dict={X_p: batch_X, Y_p: batch_y})
                                val_loss = loss_func.eval({X_p: val_x, Y_p: val_y})

                                if epoch >= select_epoch_from and epoch % 100 == 0:
                                   train_losses.append(train_loss)
                                   val_losses.append(val_loss)
                                   epochs.append(epoch)

                                #Display the epoch
                                if epoch % 1000 == 0:
                                   print("Epoch: %d, "
                                         "Training loss: %s, "
                                         "Validation loss: %s, " % (epoch, train_loss, val_loss))
                
                            pyplot.plot(epochs, train_losses, 'r', label='Training loss', linewidth=1, linestyle='-')
                            pyplot.plot(epochs, val_losses, 'b', label='Validation loss', linewidth=3, linestyle='-.')
                            pyplot.xlabel('Epoch') 
                            pyplot.ylabel('Loss')
                            pyplot.title('Training and validation loss')
                            pyplot.legend()
                            pyplot.figure()
                            pyplot.show()
                            pyplot.ioff()

                            kfold_train_RMSE = sess.run(eval_RMSE,feed_dict={X_p:train_x, Y_p:train_y})
                            kfold_val_RMSE = sess.run(eval_RMSE,feed_dict={X_p:val_x, Y_p:val_y})
                      
                            print("Final Training loss: %s, "
                                  "Final Validation loss: %s, " 
                                   "Training RMSE: %s, "
                                   "Validation RMSE: %s"% (train_loss, val_loss, kfold_train_RMSE, kfold_val_RMSE )) 
                            print("-------------------------------------------------------------------------------------\n\n")

                            kfold_train_losses.append(kfold_train_RMSE)
                            kfold_val_losses.append(kfold_val_RMSE)
  
                            if fold == 10:
                               # average train RMSE for the 10 folds
                               average_train_rmse_score_10fold = np.mean(kfold_train_losses)
                               # average validate RMSE for the 10 folds
                               average_val_rmse_score_10fold = np.mean(kfold_val_losses)
                               print("For the model with learning rate: %s, batch size: %s, type_optimizer: %s, neural_network_structure: %s \n" % (lr,batch_size,type_optimizer,neural_network_structure))
                               print("Average Training RMSE for the 10 folds: %s \n" % str(average_train_rmse_score_10fold))
                               print("Average Validation RMSE for the 10 folds: %s \n\n" % str(average_val_rmse_score_10fold))
                               print("-------------------------------------------------------------------------------------\n\n")
                               if lr == 0.001 and batch_size == 64 and type_loss_func == "MSE" and type_optimizer == "GradientDescent" and neural_network_structure == "multilayer_perceptron1":
                                  #best_training_rmse = average_train_rmse_score_10fold
                                  best_validation_rmse = average_val_rmse_score_10fold
                                  best_lr = 0.001
                                  best_batchsize = 64
                                  best_loss_func = "MSE"
                                  best_type_optimizer = "GradientDescent"
                                  best_neural_network_structure = "multilayer_perceptron1"
                               else: 
                                   if average_val_rmse_score_10fold < best_validation_rmse:
                                      #best_training_rmse = average_train_rmse_score_10fold
                                      best_validation_rmse = average_val_rmse_score_10fold
                                      best_lr = lr
                                      best_batchsize = batch_size
                                      best_loss_func = type_loss_func
                                      best_type_optimizer = type_optimizer
                                      best_neural_network_structure = neural_network_structure
    print("The best hyperparameters:\n") 
    print("learning rate: %s, batch size: %s, type_loss_func: %s, type_optimizer: %s, neural_network_structure: %s, RMSE: %s\n" % (best_lr,best_batchsize,best_type_optimizer,best_neural_network_structure,best_validation_rmse))
    end_time = time.clock()
    print("The total training time is: " + str(end_time-start_time))
    # Test model
    pred = (neural_network) 
    output=neural_network.eval({X_p: X_test}) 

    # plot prediction and labels
    pyplot.plot(y_test[0:500], 'ro', label='Testing labels')
    pyplot.plot(output[0:500], 'bo', label='Predictions')
    pyplot.xlabel('Instances') 
    pyplot.ylabel('Average_log(PM2.5 + 1)')
    pyplot.title('Partial Predictions and labels')
    pyplot.show()

    # final RMSE
    rmse_score = sess.run(eval_RMSE,feed_dict={X_p:X_test,Y_p:y_test})
    print("Average Test RMSE: " + str(rmse_score))


Fold 1, lr: 0.001, batch_size: 64, type_loss_func: MSE, type_optimizer: GradientDescent, neural_network_structure: multilayer_perceptron1
Epoch: 1000, Training loss: 0.8382342, Validation loss: 1.234572, 
Epoch: 2000, Training loss: 1.0282356, Validation loss: 0.9523863, 
Epoch: 3000, Training loss: 1.0211468, Validation loss: 0.8487048, 
Epoch: 4000, Training loss: 0.6997118, Validation loss: 0.7942722, 
Epoch: 5000, Training loss: 0.8730383, Validation loss: 0.7565574, 
Epoch: 6000, Training loss: 0.72345614, Validation loss: 0.7358815, 


KeyboardInterrupt: ignored