### Import packages

In [1]:
import numpy as np
import pandas as pd

np.random.seed(1337)  # for reproducibility
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

### Defining Road and Year for dataset

In [2]:
# Read the dataset
ROAD = "Taft Ave."
YEAR = "2015"
EXT = ".csv"

### Defining Hyper parameters

In [3]:
RBM_EPOCHS = 5
DBN_EPOCHS = 150
RBM_LEARNING_RATE = 0.01
DBN_LEARNING_RATE = 0.01
HIDDEN_LAYER_STRUCT = [20, 50, 100]
ACTIVE_FUNC = 'relu'
BATCH_SIZE = 16

### Preparing PM1 and PM2 Output dataset for training

In [15]:
traffic_data = pd.read_csv("data/mmda/eng_win2_mmda_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)

In [4]:
pm1_results = pd.read_csv("output/pm1_output_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)
pm2_results = pd.read_csv("output/pm2_output_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)

In [5]:
d = {'PM1-Output': pm1_results.Predicted, 'PM2-Output': pm2_results.Predicted}
fusion_dataset = pd.DataFrame(data=d)
fusion_dataset = np.array(fusion_dataset)
actual_dataset = pm1_results.Actual

In [6]:
# To-be Predicted variable
Y = actual_dataset
Y = Y.round(5)

# Other data
X = fusion_dataset

# Splitting data
X_train = X
Y_train = Y
X_test = X_train
Y_test = Y_train
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training the Fusion Center

In [7]:
# Training
fc = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
fc.fit(X_train, Y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.048787
>> Epoch 2 finished 	RBM Reconstruction error 0.044702
>> Epoch 3 finished 	RBM Reconstruction error 0.040769
>> Epoch 4 finished 	RBM Reconstruction error 0.036798
>> Epoch 5 finished 	RBM Reconstruction error 0.036308
>> Epoch 1 finished 	RBM Reconstruction error 0.017163
>> Epoch 2 finished 	RBM Reconstruction error 0.016946
>> Epoch 3 finished 	RBM Reconstruction error 0.016783
>> Epoch 4 finished 	RBM Reconstruction error 0.016863
>> Epoch 5 finished 	RBM Reconstruction error 0.016335
>> Epoch 1 finished 	RBM Reconstruction error 0.001140
>> Epoch 2 finished 	RBM Reconstruction error 0.001227
>> Epoch 3 finished 	RBM Reconstruction error 0.001123
>> Epoch 4 finished 	RBM Reconstruction error 0.001132
>> Epoch 5 finished 	RBM Reconstruction error 0.001182
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 0 finished 	ANN training loss 0.035447
>> Epoch 1 finished 	ANN training loss 0.035206
>>

>> Epoch 149 finished 	ANN training loss 0.000006
[END] Fine tuning step


SupervisedDBNRegression(batch_size=16, dropout_p=0, l2_regularization=1.0,
            learning_rate=0.01, n_iter_backprop=150, verbose=True)

### Testing the Fusion Center

In [8]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = fc.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

Done.
R-squared: 0.999823
MSE: 0.000006
MAE: 0.001904


### Exporting predicted and actual results

In [27]:
startIndex = traffic_data.shape[0] - fc_results.shape[0]
dt = traffic_data.dt[startIndex:,]
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test, 'dt':dt}
fc_results = pd.DataFrame(data=d)

In [28]:
fc_results

Unnamed: 0,Actual,Predicted,dt
11563,0.34164,0.342362,01/05/2015 10:45
11564,0.33944,0.340204,01/05/2015 11:00
11565,0.33737,0.338181,01/05/2015 11:15
11566,0.33543,0.336288,01/05/2015 11:30
11567,0.33362,0.334522,01/05/2015 11:45
11568,0.33193,0.332879,01/05/2015 12:00
11569,0.33037,0.331339,01/05/2015 12:15
11570,0.32892,0.329909,01/05/2015 12:30
11571,0.32758,0.328574,01/05/2015 12:45
11572,0.32636,0.327350,01/05/2015 13:00


In [None]:
fc_results.to_csv("output/fc_output_" + ROAD  + "_" + YEAR + EXT, encoding='utf-8', index=False)

### Saving the model

In [None]:
# Save the model
fc.save('models/fc_' + ROAD + '_' + YEAR + '.pkl')