## Import packages

In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

## Import training dataset for pm1 & pm2

### Define HyperParameters

In [2]:
RBM_EPOCHS = 5
DBN_EPOCHS = 150
RBM_LEARNING_RATE = 0.01
DBN_LEARNING_RATE = 0.01
HIDDEN_LAYER_STRUCT = [20, 50, 100]
ACTIVE_FUNC = 'relu'
BATCH_SIZE = 16

In [3]:
# Read the dataset
ROAD = "Taft Ave."
YEAR = "2015"
EXT = ".csv"

In [6]:
TRAFFIC_WINDOWSIZE = 4
TRAFFIC_FILENAME = "eng_win" + str(TRAFFIC_WINDOWSIZE) + "_mmda_" + ROAD + "_" + YEAR
#TRAFFIC_FILENAME = "noeng_mmda_" + ROAD + "_" + YEAR +"_transformed"

traffic_raw_data = pd.read_csv("data/mmda/" + TRAFFIC_FILENAME + EXT, skipinitialspace=True)
traffic_raw_data = traffic_raw_data.fillna(0)
traffic_dataset = traffic_raw_data

# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-4 - statusN - statusS || 5-14 - original weather variables
#15-46 - engineered traffic
cols_to_remove = [0, 1, 2]

# window 1
statusN = list(range(5, 9))
statusS = list(range(12, 16))

cols_to_remove += statusN + statusS

# window >= 2
statusN2 = list(range(9, 12))
statusS2 = list(range(16, 19))

cols_to_remove += statusN2 + statusS2

#cols_to_remove += [3, 4] #statusN , statusS

traffic_dataset.drop(traffic_dataset.columns[[cols_to_remove]], axis=1, inplace=True)
traffic_dataset.head()

Unnamed: 0,statusN,statusS
0,0.5,0.5
1,0.5,0.5
2,0.5,0.5
3,0.5,0.5
4,0.5,0.5


In [10]:
WEATHER_WINDOWSIZE = 4
WEATHER_FILENAME = "eng_win" + str(WEATHER_WINDOWSIZE) + "_wwo_" + YEAR
#WEATHER_FILENAME = "noeng_wwo_" + YEAR
weather_raw_data = pd.read_csv("data/wwo/" + WEATHER_FILENAME + EXT, skipinitialspace=True)
weather_raw_data = weather_raw_data.fillna(0)

weather_dataset = weather_raw_data

# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-12 - original weather variables

cols_to_remove = [0, 1, 2] 

temp = list(range(13, 17))
temp2 = list(range(17, 20))
windspeedkmph = list(range(20, 24))
windspeedkmph2 = list(range(24, 27))
cond = list(range(27, 31))
cond2 = list(range(31, 34))
precip = list(range(34, 38))
precip2 = list(range(38, 41))
humid = list(range(41, 45))
humid2 = list(range(45, 48))
visibility = list(range(48, 52))
visibility2 = list(range(52, 55))
pressure = list(range(55, 59))
pressure2 = list(range(59, 62))
cloudcover = list(range(62, 66))
cloudcover2 = list(range(66, 69))
dewpoint = list(range(69, 73))
dewpoint2 = list(range(73, 76))
windgustkmph = list(range(76, 80))
windgustkmph2 = list(range(80, 83))

#Window = 1
#cols_to_remove += temp + visibility + pressure +  cloudcover + dewpoint + windgustkmph #Window = 1
cols_to_remove += temp + windspeedkmph + cond + precip + humid + visibility + pressure +  cloudcover + dewpoint + windgustkmph #Window = 1

#Window  >=2
cols_to_remove += windspeedkmph2 + precip2 + cond2 + humid2 + visibility2 + cloudcover2 + dewpoint2 + windgustkmph2
#cols_to_remove += temp2 + windspeedkmph2 + cond2 + precip2 + humid2 + visibility2 + pressure2 + cloudcover2 + dewpoint2 + windgustkmph2

# 3 - 12
cols_to_remove += [5, 6, 7, 8, 9, 10, 11, 12] #Original Weather Variables

weather_dataset = weather_dataset.drop(weather_dataset.columns[[cols_to_remove]], axis=1)
weather_dataset.head()

Unnamed: 0,tempC,windspeedKmph,tempC_Rmean (window = 4),tempC_Rmin (window = 4),tempC_Rmax (window = 4),pressure_Rmean (window = 4),pressure_Rmin (window = 4),pressure_Rmax (window = 4)
0,0.2,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
1,0.1875,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
2,0.175,0.295455,0.0,0.0,0.0,0.0,0.0,0.0
3,0.1625,0.295455,0.05,0.0,0.2,0.189655,0.0,0.758621
4,0.15,0.295455,0.096875,0.0,0.3875,0.37931,0.0,1.517241


In [6]:
FLOOD_WINDOWSIZE = 2
FLOOD_FILENAME = "eng_win" + str(FLOOD_WINDOWSIZE) + "_flood_" + YEAR
flood_raw_data = pd.read_csv("data/flood/" + FLOOD_FILENAME + EXT, skipinitialspace=True)
flood_raw_data = flood_raw_data.fillna(0)

cols_to_remove = [0]

flood = [2, 3, 4, 5]
cols_to_remove += flood

flood2 = list(range(6, 9))
cols_to_remove += flood2

flood_dataset = flood_raw_data
flood_dataset = flood_dataset.drop(flood_dataset.columns[cols_to_remove], axis=1)
flood_dataset.head()

Unnamed: 0,WL [El.m]
0,0.814856
1,0.814856
2,0.814856
3,0.814856
4,0.814856


In [7]:
weather_dataset = pd.concat([weather_dataset, flood_dataset], axis=1)
weather_dataset.head()

Unnamed: 0,humidity,humidity_Rmean (window = 9),humidity_Rmin (window = 9),humidity_Rmax (window = 9),WL [El.m]
0,0.84058,0.0,0.0,0.0,0.814856
1,0.84058,0.0,0.0,0.0,0.814856
2,0.84058,0.0,0.0,0.0,0.814856
3,0.84058,0.0,0.0,0.0,0.814856
4,0.84058,0.0,0.0,0.0,0.814856


<br><br>
## Training PM1

### Preparing dataset for PM1

In [8]:
shift = 1

In [9]:
# To-be Predicted variable 
Y = traffic_dataset.statusS
Y = Y.shift(-shift)
Y = Y.fillna(0)
Y = Y.round(5)
Y = Y[:-shift]

In [10]:
# Other data
X = traffic_dataset [:-shift]

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training PM1

In [11]:
pm1 = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
pm1.fit(X_train, Y_train)

[START] Pre-training step:


InternalError: Blas GEMM launch failed : a.shape=(20, 8), b.shape=(8, 16), m=20, n=16, k=8
	 [[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Variable/read, transpose)]]

Caused by op 'MatMul', defined at:
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 478, in start
    self.io_loop.start()
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2856, in run_ast_nodes
    if self.run_code(code, result):
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-7707ddeef497>", line 8, in <module>
    pm1.fit(X_train, Y_train)
  File "C:\Users\Ronnie Nieva\Documents\Dydy\school\THESIS\code\deep-belief-network-master\deep-belief-network-master\dbn\models.py", line 334, in fit
    self.pre_train(X)
  File "C:\Users\Ronnie Nieva\Documents\Dydy\school\THESIS\code\deep-belief-network-master\deep-belief-network-master\dbn\models.py", line 356, in pre_train
    self.unsupervised_dbn.fit(X)
  File "C:\Users\Ronnie Nieva\Documents\Dydy\school\THESIS\code\deep-belief-network-master\deep-belief-network-master\dbn\models.py", line 270, in fit
    rbm.fit(input_data)
  File "C:\Users\Ronnie Nieva\Documents\Dydy\school\THESIS\code\deep-belief-network-master\deep-belief-network-master\dbn\tensorflow\models.py", line 87, in fit
    self._build_model()
  File "C:\Users\Ronnie Nieva\Documents\Dydy\school\THESIS\code\deep-belief-network-master\deep-belief-network-master\dbn\tensorflow\models.py", line 145, in _build_model
    tf.transpose(tf.matmul(self.W, tf.transpose(self.visible_units_placeholder))) + self.c)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1891, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 2436, in _mat_mul
    name=name)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "c:\users\ronnie nieva\appdata\local\conda\conda\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(20, 8), b.shape=(8, 16), m=20, n=16, k=8
	 [[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Variable/read, transpose)]]


### Testing PM1

In [None]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = pm1.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

In [None]:
# Compiling Results
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
pm1_results = pd.DataFrame(data=d)

In [None]:
# Exporting Results into csv file
pm1_results.to_csv("output/pm1_output_" + ROAD + "_" + YEAR + EXT, encoding='utf-8', index=False)

In [None]:
pm1.save('models/pm1_' + ROAD + '_' + YEAR +'.pkl')

<br><br>
## Training PM2

### Preparing dataset for PM2

In [None]:
# Other data
X = weather_dataset [:-shift]

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training PM2

In [None]:
pm2 = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
pm2.fit(X_train, Y_train)

### Testing PM2

In [None]:
X_test = min_max_scaler.transform(X_test)
Y_pred = pm2.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

print('\nMAPE: %f' % (mean_absolute_percentage_error(Y_test, Ypred)))

In [None]:
# Compiling Results
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
pm2_results = pd.DataFrame(data=d)

In [None]:
# Exporting Results into csv file
pm2_results.to_csv("output/pm2_output_" + ROAD + "_" + YEAR + EXT, encoding='utf-8', index=False)

<br><br>
## Fusion Center
### Preparing Training Dataset for Fusion Center

In [None]:
pm1_results = pd.read_csv("output/pm1_output_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)
pm2_results = pd.read_csv("output/pm2_output_" + ROAD + "_" + YEAR + EXT, skipinitialspace=True)

In [None]:
d = {'PM1-Output': pm1_results.Predicted, 'PM2-Output': pm2_results.Predicted}
fusion_dataset = pd.DataFrame(data=d)
fusion_dataset = np.array(fusion_dataset)
actual_dataset = pm1_results.Actual

In [None]:
# To-be Predicted variable
Y = actual_dataset
Y = Y.round(5)

# Other data
X = fusion_dataset

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training the Fusion Center

In [None]:
# Training
fc = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
fc.fit(X_train, Y_train)

### Testing the Fusion Center

In [None]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = fc.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))
print('MAE: %f' %(mean_absolute_error(Y_test, Y_pred)))

In [None]:
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
fc_results = pd.DataFrame(data=d)

In [None]:
fc_results.to_csv("output/fc_output_" + ROAD  + "_" + YEAR + EXT, encoding='utf-8', index=False)

<br><br>
## Saving the models

In [None]:
# Save the model
pm1.save('models/pm1_' + ROAD + '_' + YEAR +'.pkl')
pm2.save('models/pm2_' + ROAD + '_' + YEAR + '.pkl')
fc.save('models/fc_' + ROAD + '_' + YEAR + '.pkl')