# Fine-tuning a RBF model for a regression problem

In [35]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
from tensorflow import keras

from keras.datasets import boston_housing
from keras import Model, layers
from keras.metrics import RootMeanSquaredError
from keras import backend as K

from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold

import numpy as np
from scipy.spatial.distance import cdist

import time
import sys

import matplotlib.pyplot as plt
%matplotlib inline

## Useful functions

In [36]:
# The radial basis function layer
# x is the input of the layer and c is the centers of the rbf neurons
# The function returns the outputs of the rbf layer as a NxM matrix where N is the length of x and M is the length of c
def rbf_layer(x, c):
    rbf_neurons = np.shape(c)[0]

    # Find sigma
    d_max = cdist(c, c, metric="euclidean").max()
    sigma = d_max / np.sqrt(2 * rbf_neurons)
    
    # Stores the transformed set x after it passes the rbf layer
    transformed_x = np.zeros([np.shape(x)[0], rbf_neurons], dtype=np.float32)

    # Pass the x set through every rbf neuron
    for i, center in enumerate(c):
        # Find the squared norms
        norms = np.linalg.norm((x - center), axis=1) ** 2

        transformed_x[:, i] = np.exp(-norms) / (2 * (sigma))

    return transformed_x

In [37]:
# Function that plots figures based on the history of the training
def loss_acc_plot(history):
    plt.figure()
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.legend()
    plt.title('Loss of train and validation sets')
    
    plt.figure()
    plt.plot(history.history['coeff_determination'], label = 'Train')
    plt.plot(history.history['val_coeff_determination'], label = 'Validation')
    plt.legend()
    plt.title('R squared of train and validation sets')

    plt.figure()
    plt.plot(history.history['root_mean_squared_error'], label='Train')
    plt.plot(history.history['val_root_mean_squared_error'], label='Validation')
    plt.legend()
    plt.title('RMSE of train and validation sets')

    plt.show()

## Data Preprocessing

In [38]:
# Preparation of boston housing data
(x_train, y_train), (x_test, y_test) = boston_housing.load_data(test_split=0.25)

# Convert to float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
y_train, y_test = np.array(y_train, np.float32), np.array(y_test, np.float32)

# Scale the training and testing data
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [39]:
def build_model(n_h1, n_h2, dropout, lr):
    model = keras.Sequential([
        keras.layers.Dense(n_h2, input_shape=(n_h1,), activation='relu'),
        keras.layers.Dropout(dropout),
        keras.layers.Dense(1)
    ])
    
    optim = keras.optimizers.SGD(learning_rate=lr)
    loss_func = keras.losses.MeanSquaredError()

    model.compile(optimizer=optim, loss=loss_func, metrics=[RootMeanSquaredError()])
    
    return model

In [40]:
def fine_tuning(xtrain, ytrain):
    # Percentages of the RBF neurons based on the length of the training data
    n_h1_perc = np.array([0.05, 0.15, 0.3, 0.5])
    # Number of possible RBF neurons
    n_h1 = n_h1_perc * np.shape(ytrain)[0]
    n_h1 = n_h1.astype(int)
    
    # Number of possible neurons on the output layer
    n_h2 = np.array([32, 64, 128, 256])
    
    dropout = np.array([0.2, 0.35, 0.5])
    lr = 0.001
    epochs = 100
    
    # The 5-fold Cross Validation splitter
    kf = KFold(n_splits=5, shuffle=True)
    
    # Variables to hold the best values for the grid search
    best_n_h1_perc = 0
    best_n_h2 = 0
    best_dropout = 0
    best_rmse = float('inf')
    
    # Holds the mse and rmse of the 5-folds
    fold_mse = np.empty(5)
    fold_rmse = np.empty(5)
    
    for i, hidden1 in enumerate(n_h1):
        print(hidden1)
        kmeans = KMeans(n_clusters=hidden1, n_init=10).fit(xtrain)
        centers = kmeans.cluster_centers_

        # Pass the train set from the rbf layer
        x_transformed = rbf_layer(xtrain, centers)
        
        for hidden2 in n_h2:
            for drop in dropout:
                for j, (train, valid) in enumerate(kf.split(x_transformed, ytrain)):
                    model = build_model(hidden1, hidden2, drop, lr)
                    
                    history = model.fit(x_transformed[train], ytrain[train], epochs=epochs, verbose=0)   
                    
                    score = model.evaluate(x_transformed[valid], ytrain[valid])
                    
                    fold_mse[i] = score[0]
                    fold_rmse[i] = score[1]      
                    
                mse = np.mean(fold_mse)
                rmse = np.mean(fold_rmse)
                
                if best_rmse > rmse:
                    best_rmse = rmse
                    best_dropout = drop
                    best_n_h1_perc = n_h1_perc[i]
                    best_n_h2 = hidden2
                    
    return best_n_h1_perc, best_n_h2, best_dropout, best_rmse

In [41]:
st = time.time()

best_n_h1_perc, best_n_h2, best_dropout, best_rmse = fine_tuning(x_train, y_train)

end = time.time()

print('Fine-tuning time execution:', end-st, 'seconds')

print('The optimal values acquired by the fine-tuning function are:')
print(f'Best percentage of RBF neurons based on the size of the training data {best_n_h1_perc*100}%')
print(f'Best number of neurons in the output layer is: {best_n_h2} neurons')
print(f'Best dropout probability is: {best_dropout}')

18
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch