In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import tensorflow as tf 
from tensorflow import keras
from keras import Input
from keras import layers
import utils
import rbf_layer
from scipy.spatial import distance

# ---------------------------------------------------------------------------- #
#                                   read data                                  #
# ---------------------------------------------------------------------------- #

# data = "http://lib.stat.cmu.edu/datasets/boston"
data = "boston.csv"
raw_df = pd.read_csv(data, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]
print("Inputs:")
utils.data_summary(X)
print("Outputs:")
utils.data_summary(y)

Inputs:
Shape = (506, 13)
Minimum = 0.0
Maximum = 711.0
Range = 711.0
Variance = 21070.130450709916
Standard Deviation = 145.1555388220164

Outputs:
Shape = (506,)
Minimum = 5.0
Maximum = 50.0
Range = 45.0
Variance = 84.41955615616556
Standard Deviation = 9.188011545278203



In [2]:
# ---------------------------------------------------------------------------- #
#                                normalize data                                #
# ---------------------------------------------------------------------------- #

preprocessing.scale(X, copy=False)
# preprocessing.scale(y, copy=False)
print("Normalized Inputs:")
utils.data_summary(X)

Normalized Inputs:
Shape = (506, 13)
Minimum = -3.9071933049810412
Maximum = 9.933930601860267
Range = 13.841123906841307
Variance = 1.0
Standard Deviation = 1.0



In [3]:
# ---------------------------------------------------------------------------- #
#                      split data to training and testing                      #
# ---------------------------------------------------------------------------- #

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, test_size=0.25, random_state=0)

print("Training Inputs:")
utils.data_summary(X_train)
print("Testing Inputs:")
utils.data_summary(X_test)

print("Training Outputs:")
utils.data_summary(y_train)
print("Testing Outputs:")
utils.data_summary(y_test)

n_train = X_train.shape[0]

Training Inputs:
Shape = (379, 13)
Minimum = -3.9071933049810412
Maximum = 9.933930601860267
Range = 13.841123906841307
Variance = 0.9970688559815751
Standard Deviation = 0.9985333524632891

Testing Inputs:
Shape = (127, 13)
Minimum = -3.8821945679249596
Maximum = 8.136883506576424
Range = 12.019078074501383
Variance = 1.008742396399277
Standard Deviation = 1.004361686047052

Training Outputs:
Shape = (379,)
Minimum = 5.0
Maximum = 50.0
Range = 45.0
Variance = 85.30823553163789
Standard Deviation = 9.236245748768159

Testing Outputs:
Shape = (127,)
Minimum = 5.6
Maximum = 50.0
Range = 44.4
Variance = 81.69901543803087
Standard Deviation = 9.03875076755803



In [4]:
# ---------------------------------------------------------------------------- #
#                          neural network construction                         #
# ---------------------------------------------------------------------------- #

# hidden_size = int(0.1 * n_train)
# hidden_size = int(0.5 * n_train)
hidden_size = int(0.9 * n_train)

model = keras.Sequential()
model.add(Input(shape=(13,)))
model.add(rbf_layer.RBF(hidden_size, utils.InitCentersKMeans(X_train)))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(1, activation='relu'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rbf (RBF)                   (None, 341)               4433      
                                                                 
 dense (Dense)               (None, 128)               43776     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 48,338
Trainable params: 43,905
Non-trainable params: 4,433
_________________________________________________________________


In [5]:
opt = keras.optimizers.SGD(learning_rate=0.001)
model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=opt, metrics=[utils.root_mean_squared_error, utils.coeff_determination])
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
print(history.params)
# y = model(X_test)
# print(y_test)

# results = model.evaluate(X_train, y_train)
# results = model.evaluate(X_test, y_test)
# print(results)
# predictions = model.predict(X_test[:10])

# print(predictions)
# print(y_test[:10])