In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
data, target = datasets.load_boston(return_X_y=True)
print('Data shape', data.shape)
print('Target shape', target.shape)
target = target.reshape(-1, 1)
print('New target shape', target.shape)

Data shape (506, 13)
Target shape (506,)
New target shape (506, 1)


In [3]:
def rmse_error(pred, y):
    return np.mean((pred-y)**2)

In [4]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [5]:
class NN(object):
    def __init__(self, s, learning_rate, iterations=100):
        np.random.seed(0)
        self.s = s
        self.learning_rate = learning_rate
        self.iterations = iterations

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.d = X.shape[1]
        self.w_1 = np.random.random((self.s, self.d)) * 2 - 1
        self.b_1 = np.random.random((1, self.s))
        self.w_2 = np.random.random((1, self.s)) * 2 - 1
        self.b_2 = np.random.random((1, 1))

        for i in range(self.iterations):
#             # For printing
#             pred = self.predict(X)
#             rmse = rmse_error(pred, y)
#             print('Iteration', i, ' error =', rmse)
            # For optimization
            self.optimization()

    def predict(self, X_test):
        self.z_1 = np.dot(X_test, self.w_1.T) + self.b_1
        # Sigmoid activation
        self.a_1 = sigmoid(self.z_1)

        self.z_2 = np.dot(self.a_1, self.w_2.T) + self.b_2
        # Linear activation
        self.a_2 = self.z_2

        return self.a_2

    def optimization(self):
        der_C_a_2 = self.predict(self.X) - self.y
        der_a_2_z_2 = 1
        der_z_2_w_2 = self.a_1
        der_z_2_b_2 = 1

        delta = der_C_a_2 * der_a_2_z_2
        der_C_w_2 = delta * der_z_2_w_2
        der_C_b_2 = delta * der_z_2_b_2

        w_2 = self.w_2 - self.learning_rate * np.mean(der_C_w_2, axis=0)
        b_2 = self.b_2 - self.learning_rate * np.mean(der_C_b_2, axis=0)

        der_z_2_a_1 = self.w_2  # (1, s)
        der_a_1_z_1 = self.a_1 * (1 - self.a_1)  # (n, s)
        der_z_1_w_1 = self.X  # (n, k)
        der_z_1_b_1 = 1

        der_C_w_1 = np.dot((delta * der_z_2_a_1 * der_a_1_z_1).T, der_z_1_w_1)
        der_C_b_1 = delta * der_z_2_a_1 * der_a_1_z_1 * der_z_1_b_1

        w_1 = self.w_1 - self.learning_rate * der_C_w_1
        b_1 = self.b_1 - self.learning_rate * np.mean(der_C_b_1, axis=0)

        self.w_1 = w_1
        self.b_1 = b_1
        self.w_2 = w_2
        self.b_2 = b_2

In [6]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state=0)

In [7]:
s_grid = [10*i for i in range(1, 11)]
learning_grid = [10**i for i in range(-3, 1)]

In [8]:
best_rmse = np.inf
best_s = None
best_learning_rate = None
resultados = np.empty(())
for s in s_grid:
    for learning_rate in learning_grid:
        nn = NN(s=s, learning_rate=learning_rate)
        nn.fit(X_train, y_train)
        pred = nn.predict(X_test)
        rmse = rmse_error(pred, y_test)
        if rmse < best_rmse:
            best_learning_rate = learning_rate
            best_s = s
            best_rmse = rmse

  
  ret = umr_sum(arr, axis, dtype, out, keepdims)


In [9]:
print('Best rmse is', best_rmse)
print('Configuration is: s =', s, ', learning_rate =', learning_rate)

Best rmse is 73.91627713148064
Configuration is: s = 100 , learning_rate = 1
