## Import packages

In [1]:
import numpy as np
import pandas as pd

np.random.seed(1337)  # for reproducibility
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

### Define HyperParameters

In [2]:
RBM_EPOCHS = 5
DBN_EPOCHS = 150
RBM_LEARNING_RATE = 0.01
DBN_LEARNING_RATE = 0.01
HIDDEN_LAYER_STRUCT = [20, 50, 100]
ACTIVE_FUNC = 'relu'
BATCH_SIZE = 16

### Define Road and Year of Dataset

In [3]:
# Read the dataset
ROAD = "Quirino"
YEAR = "2015"
EXT = ".csv"

### Import Traffic Dataset

In [4]:
TRAFFIC_WINDOWSIZE = 4
TRAFFIC_FILENAME = "eng_win" + str(TRAFFIC_WINDOWSIZE) + "_mmda_" + ROAD + "_" + YEAR
#TRAFFIC_FILENAME = "noeng_mmda_" + ROAD + "_" + YEAR +"_transformed"

traffic_raw_data = pd.read_csv("data/mmda/" + TRAFFIC_FILENAME + EXT, skipinitialspace=True)
traffic_raw_data = traffic_raw_data.fillna(0)
traffic_dataset = traffic_raw_data

# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-4 - statusN - statusS || 5-14 - original weather variables
#15-46 - engineered traffic
cols_to_remove = [0, 1, 2]

# window 1
statusN = list(range(5, 9))
statusS = list(range(12, 16))

cols_to_remove += statusN + statusS

# window >= 2
statusN2 = list(range(9, 12))
statusS2 = list(range(16, 19))

cols_to_remove += statusN2 + statusS2

#cols_to_remove += [3, 4] #statusN , statusS

traffic_dataset.drop(traffic_dataset.columns[[cols_to_remove]], axis=1, inplace=True)
traffic_dataset.head()

Unnamed: 0,statusN,statusS
0,0.0,0.0
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0


### Import Traffic Dataset

In [5]:
WEATHER_WINDOWSIZE = 9
WEATHER_FILENAME = "eng_win" + str(WEATHER_WINDOWSIZE) + "_wwo_" + YEAR
weather_dataset = pd.read_csv("data/wwo/" + WEATHER_FILENAME + EXT, skipinitialspace=True)
weather_dataset = weather_dataset.fillna(0)

cols_to_remove = [0, 1, 2] 

#For Window = >2
temp = list(range(13, 17))
temp2 = list(range(17, 20))
windspeedkmph = list(range(20, 24))
windspeedkmph2 = list(range(24, 27))
cond = list(range(27, 31))
cond2 = list(range(31, 34))
precip = list(range(34, 38))
precip2 = list(range(38, 41))
humid = list(range(41, 45))
humid2 = list(range(45, 48))
visibility = list(range(48, 52))
visibility2 = list(range(52, 55))
pressure = list(range(55, 59))
pressure2 = list(range(59, 62))
cloudcover = list(range(62, 66))
cloudcover2 = list(range(66, 69))
dewpoint = list(range(69, 73))
dewpoint2 = list(range(73, 76))
windgustkmph = list(range(76, 80))
windgustkmph2 = list(range(80, 83))

#Expanding Window.
# All Expanding Window Columns
cols_to_remove += temp + windspeedkmph + cond + precip + humid + visibility + pressure +  cloudcover + dewpoint + windgustkmph #Window = 1

#Window  >=2
cols_to_remove += cond2 + precip2 + humid2 + visibility2 + pressure2 + cloudcover2 + dewpoint2 + windgustkmph2
# All rolling window columns
#cols_to_remove += temp2 + windspeedkmph2 + cond2 + precip2 + humid2 + visibility2 + pressure2 + cloudcover2 + dewpoint2 + windgustkmph2

#cols_to_remove += [3, 4, 5, 6, 7, 8, 9, 10, 11, 12] # All Original Weather Variables

weather_dataset = weather_dataset.drop(weather_dataset.columns[[cols_to_remove]], axis=1)
weather_dataset.head()

Unnamed: 0,tempC,windspeedKmph,tempC_Rmean (window = 9),tempC_Rmin (window = 9),tempC_Rmax (window = 9),windspeedKmph_Rmean (window = 9),windspeedKmph_Rmin (window = 9),windspeedKmph_Rmax (window = 9)
0,0.2,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
1,0.1875,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
2,0.175,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
3,0.1625,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
4,0.15,0.295455,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
FLOOD_WINDOWSIZE = 2
FLOOD_FILENAME = "eng_win" + str(FLOOD_WINDOWSIZE) + "_flood_" + YEAR
flood_raw_data = pd.read_csv("data/flood/" + FLOOD_FILENAME + EXT, skipinitialspace=True)
flood_raw_data = flood_raw_data.fillna(0)

cols_to_remove = [0]

flood = [2, 3, 4, 5]
cols_to_remove += flood

flood2 = list(range(6, 9))
cols_to_remove += flood2

flood_dataset = flood_raw_data
flood_dataset = flood_dataset.drop(flood_dataset.columns[cols_to_remove], axis=1)
flood_dataset.head()

Unnamed: 0,WL [El.m]
0,0.814856
1,0.814856
2,0.814856
3,0.814856
4,0.814856


In [7]:
weather_dataset = pd.concat([weather_dataset, flood_dataset], axis=1)
weather_dataset.head()

Unnamed: 0,tempC,windspeedKmph,tempC_Rmean (window = 9),tempC_Rmin (window = 9),tempC_Rmax (window = 9),windspeedKmph_Rmean (window = 9),windspeedKmph_Rmin (window = 9),windspeedKmph_Rmax (window = 9),WL [El.m]
0,0.2,0.295455,0.0,0.0,0.0,0.0,0.0,0.0,0.814856
1,0.1875,0.295455,0.0,0.0,0.0,0.0,0.0,0.0,0.814856
2,0.175,0.295455,0.0,0.0,0.0,0.0,0.0,0.0,0.814856
3,0.1625,0.295455,0.0,0.0,0.0,0.0,0.0,0.0,0.814856
4,0.15,0.295455,0.0,0.0,0.0,0.0,0.0,0.0,0.814856


<br><br>
## Training PM1

### Preparing dataset for PM1

In [8]:
shift = 1

In [9]:
# To-be Predicted variable 
Y = traffic_dataset.statusS
Y = Y.shift(-shift)
Y = Y.fillna(0)
Y = Y.round(5)
Y = Y[:-shift]

In [10]:
# Other data
X = traffic_dataset [:-shift]

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training PM1

In [11]:
pm1 = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
pm1.fit(X_train, Y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.083082
>> Epoch 2 finished 	RBM Reconstruction error 0.039274
>> Epoch 3 finished 	RBM Reconstruction error 0.032054
>> Epoch 4 finished 	RBM Reconstruction error 0.028211
>> Epoch 5 finished 	RBM Reconstruction error 0.028537
>> Epoch 1 finished 	RBM Reconstruction error 0.091676
>> Epoch 2 finished 	RBM Reconstruction error 0.050343
>> Epoch 3 finished 	RBM Reconstruction error 0.024477
>> Epoch 4 finished 	RBM Reconstruction error 0.017136
>> Epoch 5 finished 	RBM Reconstruction error 0.013503
>> Epoch 1 finished 	RBM Reconstruction error 0.078696
>> Epoch 2 finished 	RBM Reconstruction error 0.051939
>> Epoch 3 finished 	RBM Reconstruction error 0.025084
>> Epoch 4 finished 	RBM Reconstruction error 0.013016
>> Epoch 5 finished 	RBM Reconstruction error 0.011979
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 0 finished 	ANN training loss 0.018365
>> Epoch 1 finished 	ANN training loss 0.013482
>>

>> Epoch 149 finished 	ANN training loss 0.010548
[END] Fine tuning step


SupervisedDBNRegression(batch_size=16, dropout_p=0, l2_regularization=1.0,
            learning_rate=0.01, n_iter_backprop=150, verbose=True)

### Testing PM1

In [12]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = pm1.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

Done.
R-squared: 0.924729
MSE: 0.006026
MAE: 0.017608


In [13]:
# Compiling Results
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
pm1_results = pd.DataFrame(data=d)

In [14]:
# Exporting Results into csv file
pm1_results.to_csv("output/pm1_output_" + ROAD + "_" + YEAR + EXT, encoding='utf-8', index=False)

In [15]:
pm1.save('models/pm1_' + ROAD + '_' + YEAR +'.pkl')

<br><br>
## Training PM2

### Preparing dataset for PM2

In [16]:
# Other data
X = weather_dataset [:-shift]

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training PM2

In [17]:
pm2 = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
pm2.fit(X_train, Y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.187569
>> Epoch 2 finished 	RBM Reconstruction error 0.116009
>> Epoch 3 finished 	RBM Reconstruction error 0.084389
>> Epoch 4 finished 	RBM Reconstruction error 0.081148
>> Epoch 5 finished 	RBM Reconstruction error 0.079729
>> Epoch 1 finished 	RBM Reconstruction error 0.110599
>> Epoch 2 finished 	RBM Reconstruction error 0.089639
>> Epoch 3 finished 	RBM Reconstruction error 0.073596
>> Epoch 4 finished 	RBM Reconstruction error 0.061132
>> Epoch 5 finished 	RBM Reconstruction error 0.052825
>> Epoch 1 finished 	RBM Reconstruction error 0.053468
>> Epoch 2 finished 	RBM Reconstruction error 0.050684
>> Epoch 3 finished 	RBM Reconstruction error 0.047410
>> Epoch 4 finished 	RBM Reconstruction error 0.045793
>> Epoch 5 finished 	RBM Reconstruction error 0.044788
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 0 finished 	ANN training loss 0.123025
>> Epoch 1 finished 	ANN training loss 0.120624
>>

>> Epoch 149 finished 	ANN training loss 0.090450
[END] Fine tuning step


SupervisedDBNRegression(batch_size=16, dropout_p=0, l2_regularization=1.0,
            learning_rate=0.01, n_iter_backprop=150, verbose=True)

### Testing PM2

In [18]:
X_test = min_max_scaler.transform(X_test)
Y_pred = pm2.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

Done.
R-squared: -0.089768
MSE: 0.087244
MAE: 0.231066


In [19]:
# Compiling Results
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
pm2_results = pd.DataFrame(data=d)

In [20]:
# Exporting Results into csv file
pm2_results.to_csv("output/pm2_output_" + ROAD + "_" + YEAR + EXT, encoding='utf-8', index=False)
pm2.save('models/pm2_' + ROAD + '_' + YEAR +'.pkl')

<br><br>
## Fusion Center
### Preparing Training Dataset for Fusion Center

In [21]:
pm1_results = pd.read_csv("output/pm1_output_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)
pm2_results = pd.read_csv("output/pm2_output_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)

In [22]:
d = {'PM1-Output': pm1_results.Predicted, 'PM2-Output': pm2_results.Predicted}
fusion_dataset = pd.DataFrame(data=d)
fusion_dataset = np.array(fusion_dataset)
actual_dataset = pm1_results.Actual

In [23]:
# To-be Predicted variable
Y = actual_dataset
Y = Y.round(5)

# Other data
X = fusion_dataset

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training the Fusion Center

In [24]:
# Training
fc = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
fc.fit(X_train, Y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.080204
>> Epoch 2 finished 	RBM Reconstruction error 0.070645
>> Epoch 3 finished 	RBM Reconstruction error 0.062924
>> Epoch 4 finished 	RBM Reconstruction error 0.057098
>> Epoch 5 finished 	RBM Reconstruction error 0.051241
>> Epoch 1 finished 	RBM Reconstruction error 0.035660
>> Epoch 2 finished 	RBM Reconstruction error 0.035179
>> Epoch 3 finished 	RBM Reconstruction error 0.034521
>> Epoch 4 finished 	RBM Reconstruction error 0.033969
>> Epoch 5 finished 	RBM Reconstruction error 0.033293
>> Epoch 1 finished 	RBM Reconstruction error 0.003001
>> Epoch 2 finished 	RBM Reconstruction error 0.002977
>> Epoch 3 finished 	RBM Reconstruction error 0.002981
>> Epoch 4 finished 	RBM Reconstruction error 0.002967
>> Epoch 5 finished 	RBM Reconstruction error 0.002954
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 0 finished 	ANN training loss 0.067234
>> Epoch 1 finished 	ANN training loss 0.067041
>>

>> Epoch 149 finished 	ANN training loss 0.003834
[END] Fine tuning step


SupervisedDBNRegression(batch_size=16, dropout_p=0, l2_regularization=1.0,
            learning_rate=0.01, n_iter_backprop=150, verbose=True)

### Testing the Fusion Center

In [25]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = fc.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

Done.
R-squared: 0.917420
MSE: 0.007064
MAE: 0.021589


In [26]:
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
fc_results = pd.DataFrame(data=d)

In [27]:
fc_results.to_csv("output/fc_output_" + ROAD  + "_" + YEAR + EXT, encoding='utf-8', index=False)

<br><br>
## Saving the models

In [28]:
# Save the model

fc.save('models/fc_' + ROAD + '_' + YEAR + '.pkl')