## Import packages

In [19]:
import numpy as np
import pandas as pd

np.random.seed(1337)  # for reproducibility
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

## Import training dataset for pm1 & pm2

### Define HyperParameters

In [20]:
RBM_EPOCHS = 5
DBN_EPOCHS = 10
RBM_LEARNING_RATE = 0.01
DBN_LEARNING_RATE = 0.01
HIDDEN_LAYER_STRUCT = [25, 50, 100]
ACTIVE_FUNC = 'relu'
BATCH_SIZE = 16

In [21]:
# Read the dataset
ROAD = "Taft Ave."
YEAR = "2015"
EXT = ".csv"
FILENAME = "merged_mmda_wwo_" + ROAD + "_" + YEAR
original_dataset = pd.read_csv("data/mmda-wwo/" + FILENAME + EXT, skipinitialspace=True)

In [22]:
traffic_dataset = original_dataset
# Preparing Traffic Dataset
# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-4 - statusN - statusS || 5-end - weather variables
cols_to_remove = [0, 1, 2] + list(range(5, traffic_dataset.shape[1]))

traffic_dataset = traffic_dataset.drop(traffic_dataset.columns[[cols_to_remove]], axis=1)
#traffic_dataset.head()

In [23]:
weather_dataset = original_dataset
# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-4 - statusN - statusS || 5-end - weather variables
cols_to_remove = [0, 1, 2] + [3, 4]

#Remove Redundant Variables
#Variables = tempC WindspeedKmph, cond, precipMM, humidity, visibility, pressure, cloudcover, dewPointC, windGustKmph, 
redundant_variables = [6, 7, 9, 16, 17, 19, 20, 21, 22, 24, 25]
cols_to_remove += redundant_variables

weather_dataset = weather_dataset.drop(weather_dataset.columns[[cols_to_remove]], axis=1)
#weather_dataset.head()

<br><br>
## Training PM1

### Preparing dataset for PM1

In [24]:
# To-be Predicted variable
Y = original_dataset.statusS
Y = Y.round(5)

In [None]:
# Other data
X = traffic_dataset

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training PM1

In [None]:
pm1 = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
pm1.fit(X_train, Y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.095327


### Testing PM1

In [None]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = pm1.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))

In [None]:
# Compiling Results
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
pm1_results = pd.DataFrame(data=d)

In [None]:
# Exporting Results into csv file
pm1_results.to_csv("output/pm1_output_" + ROAD + "_" + YEAR + EXT, encoding='utf-8', index=False)

<br><br>
## Training PM2

### Preparing dataset for PM2

In [None]:
# Other data
X = weather_dataset

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training PM2

In [None]:
pm2 = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
pm2.fit(X_train, Y_train)

### Testing PM2

In [None]:
X_test = min_max_scaler.transform(X_test)
Y_pred = pm2.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))

In [None]:
# Compiling Results
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
pm2_results = pd.DataFrame(data=d)

In [None]:
# Exporting Results into csv file
pm2_results.to_csv("output/pm2_output_" + ROAD + "_" + YEAR + EXT, encoding='utf-8', index=False)

<br><br>
## Fusion Center
### Preparing Training Dataset for Fusion Center

In [None]:
d = {'PM1-Output': pm1_results.Predicted, 'PM2-Output': pm2_results.Predicted}
fusion_dataset = pd.DataFrame(data=d)
fusion_dataset = np.array(fusion_dataset)
actual_dataset = pm1_results.Actual

In [None]:
# To-be Predicted variable
Y = actual_dataset
Y = Y.round(5)

# Other data
X = fusion_dataset

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train) q
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

### Training the Fusion Center

In [None]:
# Training
fc = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
fc.fit(X_train, Y_train)

### Testing the Fusion Center

In [None]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = fc.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))

In [None]:
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}
fc_results = pd.DataFrame(data=d)

In [None]:
fc_results.to_csv("output/fc_output_" + ROAD  + "_" + YEAR + EXT, encoding='utf-8', index=False)

<br><br>
## Saving the models

In [None]:
# Save the model
pm1.save('models/pm1_' + ROAD + '_' + YEAR +'.pkl')
pm2.save('models/pm2_' + ROAD + '_' + YEAR + '.pkl')
fc.save('models/fc_' + ROAD + '_' + YEAR + '.pkl')