In [4]:
import numpy as np
import pandas as pd

np.random.seed(1337)  # for reproducibility
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.regression import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from dbn.tensorflow import SupervisedDBNRegression

In [5]:
# Read the dataset
PM1_FILENAME = "pm1_noeng_output_merged_mmda_wwo_Taft Ave._2015"
PM2_FILENAME = "pm2_noeng_output_merged_mmda_wwo_Taft Ave._2015"
EXT = ".csv"
pm1_output_dataset = pd.read_csv("output/" + PM1_FILENAME + EXT, skipinitialspace=True)
pm2_output_dataset = pd.read_csv("output/" + PM2_FILENAME + EXT, skipinitialspace=True)

##### Preparing Training Dataset

In [6]:
pm1_output_dataset = pd.read_csv("output/" + PM1_FILENAME + EXT, skipinitialspace=True)
pm2_output_dataset = pd.read_csv("output/" + PM2_FILENAME + EXT, skipinitialspace=True)

print("Predicted Column of pm1 : " + str(pm1_output_dataset.columns[0:pm1_output_dataset.shape[1]][2]))
print("Predicted Column of pm2 : " + str(pm2_output_dataset.columns[0:pm2_output_dataset.shape[1]][1]))

pm1_output_dataset.Predicted
d = {'PM1-Output': pm1_output_dataset.Predicted, 'PM2-Output': pm2_output_dataset.Predicted}
df = pd.DataFrame(data=d)
df.head()

training_dataset = np.array(df)

Predicted Column of pm1 : Predicted
Predicted Column of pm2 : Predicted


In [7]:
# To-be Predicted variable
Y = pm1_output_dataset.Actual
Y = Y.round(5)

# Other data
X = training_dataset
#X = dataset
#X.statusS = X.statusS.round(5)

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.67, shuffle=False)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Data scaling
min_max_scaler = MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

In [8]:
# Training
regressor = SupervisedDBNRegression(hidden_layers_structure=[3],
                                    learning_rate_rbm=0.01,
                                    learning_rate=0.01,
                                    n_epochs_rbm=3,
                                    n_iter_backprop=5,
                                    batch_size=16,
                                    activation_function='relu')
regressor.fit(X_train, Y_train)


[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 0.154636
>> Epoch 2 finished 	RBM Reconstruction error 0.134832
>> Epoch 3 finished 	RBM Reconstruction error 0.122632
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 0 finished 	ANN training loss 0.028189
>> Epoch 1 finished 	ANN training loss 0.006283
>> Epoch 2 finished 	ANN training loss 0.001696
>> Epoch 3 finished 	ANN training loss 0.000665
>> Epoch 4 finished 	ANN training loss 0.000326
[END] Fine tuning step


SupervisedDBNRegression(batch_size=16, dropout_p=0, l2_regularization=1.0,
            learning_rate=0.01, n_iter_backprop=5, verbose=True)

In [9]:
# Test
X_test = min_max_scaler.transform(X_test)
Y_pred = regressor.predict(X_test)
print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred)))


Done.
R-squared: 0.996239
MSE: 0.000430


In [10]:
print(len(Y_pred))
temp = []
for i in range(len(Y_pred)):
    temp.append(Y_pred[i][0])
d = {'Predicted': temp, 'Actual': Y_test}

df = pd.DataFrame(data=d)
df.head()

15730


Unnamed: 0,Actual,Predicted
0,0.5,0.471369
1,0.5,0.471552
2,0.5,0.471735
3,0.5,0.472126
4,0.5,0.472517


In [11]:
df

Unnamed: 0,Actual,Predicted
0,0.5,0.471369
1,0.5,0.471552
2,0.5,0.471735
3,0.5,0.472126
4,0.5,0.472517
5,0.5,0.472908
6,0.5,0.473299
7,0.5,0.479248
8,0.5,0.485198
9,0.5,0.491148


In [12]:
# Save the model
regressor.save('models/fusion-center.pkl')

# # Restore
# classifier = SupervisedDBNClassification.load('model.pkl')

# # Test
# Y_pred = classifier.predict(X_test)
# print('Done.\nAccuracy: %f' % accuracy_score(Y_test, Y_pred))


In [13]:
FILENAME = "merged_mmda_wwo_Taft Ave._2015"
EXT = ".csv"
df.to_csv("output/fusion-center_output" + FILENAME + EXT, encoding='utf-8', index=False)