In [1]:
import numpy as np
import pandas as pd

np.random.seed(1337)  # for reproducibility
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

In [2]:
#Preparing Dataset
# Read the dataset
ROAD = "Taft Ave."
YEAR = "2015"
EXT = ".csv"
DATE = "03-20-2015"
PRED = "715AM"
FILENAME = "test_" + ROAD + "_" + YEAR + "_" + DATE + "_" + "Pred-" + PRED

TIMESTEP = 10

original_dataset = pd.read_csv("data/testing/" + FILENAME + EXT, skipinitialspace=True)

In [3]:
original_dataset

Unnamed: 0,dt,lineName,stationName,statusN,statusS,tempC,tempF,windspeedMiles,windspeedKmph,winddirDegree,...,heatIndexC,heatIndexF,dewPointC,dewPointF,windChillC,windChillF,windGustMiles,windGustKmph,feelsLikeC,feelsLikeF
0,20/01/2015 3:30,EDSA,Taft Ave.,0.0,0.5,0.15,0.180556,0.125,0.102273,0.274788,...,0.189655,0.198113,0.461538,0.48,0.15,0.180556,0.102273,0.104167,0.189655,0.198113
1,20/01/2015 3:45,EDSA,Taft Ave.,0.0,0.5,0.15,0.1875,0.116071,0.096591,0.270538,...,0.198276,0.20283,0.461538,0.48,0.15,0.1875,0.096591,0.100694,0.198276,0.20283
2,20/01/2015 4:00,EDSA,Taft Ave.,0.0,0.5,0.15,0.194444,0.107143,0.090909,0.266289,...,0.206897,0.207547,0.461538,0.48,0.15,0.194444,0.090909,0.097222,0.206897,0.207547
3,20/01/2015 4:15,EDSA,Taft Ave.,0.0,0.5,0.1625,0.194444,0.107143,0.090909,0.26204,...,0.206897,0.212264,0.461538,0.48,0.1625,0.194444,0.090909,0.09375,0.206897,0.212264
4,20/01/2015 4:30,EDSA,Taft Ave.,0.0,0.5,0.175,0.194444,0.107143,0.090909,0.25779,...,0.206897,0.216981,0.461538,0.48,0.175,0.194444,0.090909,0.090278,0.206897,0.216981
5,20/01/2015 4:45,EDSA,Taft Ave.,0.0,0.5,0.1875,0.194444,0.107143,0.090909,0.253541,...,0.206897,0.221698,0.461538,0.48,0.1875,0.194444,0.090909,0.086806,0.206897,0.221698
6,20/01/2015 5:00,EDSA,Taft Ave.,0.0,0.5,0.2,0.194444,0.107143,0.090909,0.249292,...,0.206897,0.226415,0.461538,0.48,0.2,0.194444,0.090909,0.083333,0.206897,0.226415
7,20/01/2015 5:15,EDSA,Taft Ave.,0.0,0.5,0.2,0.201389,0.107143,0.085227,0.245042,...,0.206897,0.226415,0.461538,0.48,0.2,0.201389,0.085227,0.079861,0.206897,0.226415
8,20/01/2015 5:30,EDSA,Taft Ave.,0.0,0.5,0.2,0.208333,0.107143,0.079545,0.240793,...,0.206897,0.226415,0.461538,0.48,0.2,0.208333,0.079545,0.076389,0.206897,0.226415
9,20/01/2015 5:45,EDSA,Taft Ave.,0.0,0.5,0.2,0.215278,0.107143,0.073864,0.236544,...,0.206897,0.226415,0.461538,0.48,0.2,0.215278,0.073864,0.072917,0.206897,0.226415


In [4]:
traffic_dataset = original_dataset
# Preparing Traffic Dataset
# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-4 - statusN - statusS || 5-end - weather variables
cols_to_remove = [0, 1, 2] + list(range(5, traffic_dataset.shape[1]))

traffic_dataset = traffic_dataset.drop(traffic_dataset.columns[[cols_to_remove]], axis=1)
traffic_dataset = traffic_dataset.iloc[0:TIMESTEP]
traffic_dataset

Unnamed: 0,statusN,statusS
0,0.0,0.5
1,0.0,0.5
2,0.0,0.5
3,0.0,0.5
4,0.0,0.5
5,0.0,0.5
6,0.0,0.5
7,0.0,0.5
8,0.0,0.5
9,0.0,0.5


In [5]:
weather_dataset = original_dataset
# Remove date time. Remove unused columms
#0-2 = dt + lineName + stationName || 3-4 - statusN - statusS || 5-end - weather variables
cols_to_remove = [0, 1, 2] + [3, 4]

#Remove Redundant Variables
#Variables = tempC WindspeedKmph, cond, precipMM, humidity, visibility, pressure, cloudcover, dewPointC, windGustKmph, 
redundant_variables = [6, 7, 9, 16, 17, 19, 20, 21, 22, 24, 25]
cols_to_remove += redundant_variables
 
weather_dataset = weather_dataset.drop(weather_dataset.columns[[cols_to_remove]], axis=1)
weather_dataset = weather_dataset.iloc[0:TIMESTEP]
weather_dataset

Unnamed: 0,tempC,windspeedKmph,cond,precipMM,humidity,visibility,pressure,cloudcover,dewPointC,windGustKmph
0,0.15,0.102273,0.0,0,0.818841,1,0.793103,0.1,0.461538,0.104167
1,0.15,0.096591,0.0,0,0.815217,1,0.793103,0.1,0.461538,0.100694
2,0.15,0.090909,0.0,0,0.811594,1,0.793103,0.1,0.461538,0.097222
3,0.1625,0.090909,0.0,0,0.807971,1,0.793103,0.1025,0.461538,0.09375
4,0.175,0.090909,0.0,0,0.804348,1,0.793103,0.105,0.461538,0.090278
5,0.1875,0.090909,0.0,0,0.800725,1,0.793103,0.1075,0.461538,0.086806
6,0.2,0.090909,0.0,0,0.797101,1,0.793103,0.11,0.461538,0.083333
7,0.2,0.085227,0.223684,0,0.793478,1,0.793103,0.11,0.461538,0.079861
8,0.2,0.079545,0.447368,0,0.789855,1,0.793103,0.11,0.461538,0.076389
9,0.2,0.073864,0.671053,0,0.786232,1,0.793103,0.11,0.461538,0.072917


In [6]:
y_test = original_dataset.iloc[TIMESTEP:(TIMESTEP*2)].statusS

In [7]:
pm1_Y_test = pm2_Y_test = y_test
pm1_Y_test

10    0.5
11    0.5
12    0.5
13    0.5
14    0.5
15    0.5
16    0.5
17    1.0
18    1.0
19    1.0
Name: statusS, dtype: float64

In [8]:
pm1_X_test = traffic_dataset
pm2_X_test = weather_dataset

In [9]:
# Save the model
pm1 = SupervisedDBNRegression.load('models/pm1_' + ROAD + '_' + YEAR + '.pkl')
pm2 = SupervisedDBNRegression.load('models/pm2_' + ROAD + '_' + YEAR + '.pkl')
fc = SupervisedDBNRegression.load('models/fc_' + ROAD + '_' + YEAR + '.pkl')

In [10]:
# Test PM1
pm1_Y_pred = pm1.predict(pm1_X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(pm1_Y_test, pm1_Y_pred), mean_squared_error(pm1_Y_test, pm1_Y_pred)))

# Test PM2
pm2_Y_pred = pm2.predict(pm2_X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(pm2_Y_test, pm2_Y_pred), mean_squared_error(pm2_Y_test, pm2_Y_pred)))

Done.
R-squared: -0.430486
MSE: 0.075101
Done.
R-squared: -0.489103
MSE: 0.078178


In [11]:
# Compiling Results
pm1_temp = []
pm2_temp = []

for i in range(len(pm1_Y_pred)):
    pm1_temp.append(pm1_Y_pred[i][0])
    pm2_temp.append(pm2_Y_pred[i][0])
    
dpm1 = {'Predicted_PM1': pm1_temp, 'Predicted_PM2': pm2_temp, 'Actual': pm1_Y_test}
pm1_pm2_results = pd.DataFrame(data=dpm1)

pm1_pm2_results

Unnamed: 0,Actual,Predicted_PM1,Predicted_PM2
10,0.5,0.499665,0.467629
11,0.5,0.499665,0.467276
12,0.5,0.499665,0.466924
13,0.5,0.499665,0.474926
14,0.5,0.499665,0.482928
15,0.5,0.499665,0.49093
16,0.5,0.499665,0.498932
17,1.0,0.499665,0.494919
18,1.0,0.499665,0.490906
19,1.0,0.499665,0.486893


In [12]:
dfc = {'PM1-Output': pm1_pm2_results.Predicted_PM1, 'PM2-Output': pm1_pm2_results.Predicted_PM1}
fusion_dataset = pd.DataFrame(data=dfc)
fusion_dataset = np.array(fusion_dataset)
actual_dataset = pm1_pm2_results.Actual

In [13]:
fc_Y_test = y_test
fc_X_test = dfc

In [14]:
# Test FC
fc_Y_pred = fc.predict(pm1_X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(fc_Y_test, fc_Y_pred), mean_squared_error(fc_Y_test, fc_Y_pred)))

Done.
R-squared: -8.378168
MSE: 0.492354


In [15]:
temp = []
for i in range(len(fc_Y_pred)):
    temp.append(fc_Y_pred[i][0])
d = {'Predicted': temp, 'Actual': fc_Y_test}
fc_results = pd.DataFrame(data=d)
fc_results

Unnamed: 0,Actual,Predicted
10,0.5,-0.013215
11,0.5,-0.013215
12,0.5,-0.013215
13,0.5,-0.013215
14,0.5,-0.013215
15,0.5,-0.013215
16,0.5,-0.013215
17,1.0,-0.013215
18,1.0,-0.013215
19,1.0,-0.013215
