In [13]:
import numpy as np
import pandas as pd

np.random.seed(1337)  # for reproducibility
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

In [3]:
RBM_EPOCHS = 3
DBN_EPOCHS = 20
RBM_LEARNING_RATE = 0.01
DBN_LEARNING_RATE = 0.01
HIDDEN_LAYER_STRUCT = [5, 10, 15]
ACTIVE_FUNC = 'relu'
BATCH_SIZE = 16

In [4]:
# Read the dataset
ROAD = "Taft Ave."
YEAR = "2015"
EXT = ".csv"

##### Preparing Traffic Dataset

In [5]:
TRAFFIC_WINDOWSIZE = 4
TRAFFIC_FILENAME = "eng_win" + str(TRAFFIC_WINDOWSIZE) + "_mmda_" + ROAD + "_" + YEAR
#TRAFFIC_FILENAME = "noeng_mmda_" + ROAD + "_" + YEAR +"_transformed"

traffic_raw_data = pd.read_csv("data/mmda/" + TRAFFIC_FILENAME + EXT, skipinitialspace=True)
traffic_raw_data = traffic_raw_data.fillna(0)
traffic_dataset = traffic_raw_data

cols_to_remove = [0, 1, 2]
statusSN = list(range(5, 19)) #remove all engineered features
cols_to_remove += statusSN

traffic_dataset.drop(traffic_dataset.columns[[cols_to_remove]], axis=1, inplace=True)
traffic_dataset.head()

Unnamed: 0,statusN,statusS
0,0.5,0.5
1,0.5,0.5
2,0.5,0.5
3,0.5,0.5
4,0.5,0.5


##### Preparing Weather Dataset

In [6]:
WEATHER_WINDOWSIZE = 3
WEATHER_FILENAME = "eng_win" + str(WEATHER_WINDOWSIZE) + "_wwo_" + YEAR
weather_raw_data = pd.read_csv("data/wwo/" + WEATHER_FILENAME + EXT, skipinitialspace=True)
weather_raw_data = weather_raw_data.fillna(0)

weather_dataset = weather_raw_data

cols_to_remove = [0, 1, 2] 

#For Window = >2
temp = list(range(13, 17))
temp2 = list(range(17, 20))
windspeedkmph = list(range(20, 24))
windspeedkmph2 = list(range(24, 27))
cond = list(range(27, 31))
cond2 = list(range(31, 34))
precip = list(range(34, 38))
precip2 = list(range(38, 41))
humid = list(range(41, 45))
humid2 = list(range(45, 48))
visibility = list(range(48, 52))
visibility2 = list(range(52, 55))
pressure = list(range(55, 59))
pressure2 = list(range(59, 62))
cloudcover = list(range(62, 66))
cloudcover2 = list(range(66, 69))
dewpoint = list(range(69, 73))
dewpoint2 = list(range(73, 76))
windgustkmph = list(range(76, 80))
windgustkmph2 = list(range(80, 83))

#Window = 1
#cols_to_remove += temp + visibility + pressure +  cloudcover + dewpoint + windgustkmph #Window = 1
cols_to_remove += temp + windspeedkmph + cond + precip + humid + visibility + pressure +  cloudcover + dewpoint + windgustkmph #Window = 1

#Window  >=2
#cols_to_remove += precip2 + cond2 + humid2 + visibility2 + cloudcover2 + dewpoint2 + windgustkmph2
cols_to_remove +=  temp2 + windspeedkmph2 + cond2 + precip2 + humid2 + visibility2 + pressure2 + cloudcover2 + dewpoint2 + windgustkmph2

# 3 - 12
#cols_to_remove += [7, 8, 9, 10] #Original Weather Variables
#cols_to_remove += [3, 4, 5, 6, 7, 8, 9, 10, 11, 12] #Original Weather Variables

weather_dataset = weather_dataset.drop(weather_dataset.columns[[cols_to_remove]], axis=1)
weather_dataset.head()

Unnamed: 0,tempC,windspeedKmph,cond,precipMM,humidity,visibility,pressure,cloudcover,dewPointC,windGustKmph
0,0.2,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.33,0.538462,0.236111
1,0.1875,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.325,0.538462,0.239583
2,0.175,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.32,0.538462,0.243056
3,0.1625,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.315,0.538462,0.246528
4,0.15,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.31,0.538462,0.25


##### Preparing Flood Dataset

In [7]:
FLOOD_WINDOWSIZE = 5
FLOOD_FILENAME = "eng_win" + str(FLOOD_WINDOWSIZE) + "_flood_" + YEAR
flood_raw_data = pd.read_csv("data/flood/" + FLOOD_FILENAME + EXT, skipinitialspace=True)
flood_raw_data = flood_raw_data.fillna(0)

cols_to_remove = [0]

flood = [2, 3, 4, 5]
cols_to_remove += flood

flood2 = list(range(6, 9))
cols_to_remove += flood2

flood_dataset = flood_raw_data
flood_dataset = flood_dataset.drop(flood_dataset.columns[cols_to_remove], axis=1)
flood_dataset.head()

Unnamed: 0,WL [El.m]
0,0.814856
1,0.814856
2,0.814856
3,0.814856
4,0.814856


###### Merging weather and flood

In [8]:
weather_dataset = pd.concat([weather_dataset, flood_dataset], axis=1)
weather_dataset.head()

Unnamed: 0,tempC,windspeedKmph,cond,precipMM,humidity,visibility,pressure,cloudcover,dewPointC,windGustKmph,WL [El.m]
0,0.2,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.33,0.538462,0.236111,0.814856
1,0.1875,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.325,0.538462,0.239583,0.814856
2,0.175,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.32,0.538462,0.243056,0.814856
3,0.1625,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.315,0.538462,0.246528,0.814856
4,0.15,0.295455,0.631579,0.0,0.84058,1.0,0.758621,0.31,0.538462,0.25,0.814856


##### Preparing Training Dataset

In [9]:
shift = 1

In [10]:
# To-be Predicted variable 
Y = traffic_dataset.statusS
Y = Y.shift(-shift)
Y = Y.fillna(0)
Y = Y.round(5)
Y = Y[:-shift]

In [11]:
# Other data
X = weather_dataset [:-shift]

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

In [12]:
# Training
regressor = SupervisedDBNRegression(hidden_layers_structure=HIDDEN_LAYER_STRUCT,
                                    learning_rate_rbm=RBM_LEARNING_RATE,
                                    learning_rate=DBN_LEARNING_RATE,
                                    n_epochs_rbm=RBM_EPOCHS,
                                    n_iter_backprop=DBN_EPOCHS,
                                    batch_size=BATCH_SIZE,
                                    activation_function=ACTIVE_FUNC)
regressor.fit(X_train, Y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.467042


KeyboardInterrupt: 

In [None]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = regressor.predict(X_test)

r2score = r2_score(Y_test, Y_pred)
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred))
mae = mean_absolute_error(Y_test, Y_pred)
print('Done.\nR-squared: %.3f\nRMSE: %.3f \nMAE: %.3f' % (r2score, rmse, mae))

### Results and Analysis below

##### Printing Predicted and Actual Results

In [None]:
print(len(Y_pred))
startIndex = traffic_data.shape[0] - fc_results.shape[0]
dt = traffic_data.dt[startIndex:,]
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test, 'dt':dt}

df = pd.DataFrame(data=d)
df.head()

In [None]:
df

#### Visualize trend of loss of RBM Training

In [None]:
#dbn_loss_error = []
#rbm_loss_error = []
line1 = rbm_loss_error
x = RBM_EPOCHS * len(HIDDEN_LAYER_STRUCT)
plt.plot(x, line1)
plt.plot(x, line2)
plt.xticks(x)
plt.xlabel("Iteration")
plt.ylabel("Error")
plt.show()

#### Visualize trend of loss of RBM Training

In [None]:
#dbn_loss_error = []
#rbm_loss_error = []
line1 = dbn_loss_error
x = DBN_EPOCHS
plt.plot(x, line1)
plt.plot(x, line2)
plt.xticks(x)
plt.xlabel("Iteration")
plt.ylabel("Error")
plt.show()

In [None]:
# # Save the model
# regressor.save('models/pm2_' + ROAD + '_' + YEAR + '.pkl')

In [None]:
# df.to_csv("output/pm2_output_" + ROAD + EXT, encoding='utf-8')