## Introduction

In [1]:
import numpy
from sklearn import datasets, preprocessing
X = datasets.load_boston().data
X = preprocessing.normalize(X)
y = datasets.load_boston().target

In [2]:
class ShallowNet:
    
    def __init__(self, params):
        # collect desired size of layers
        self.input_size = params['input_size']
        self.hidden_size = params['hidden_size']
        self.output_size = params['output_size']
        
        # initialize weights
        self.W1 = np.random.randn(self.hidden_size, self.input_size) # (hid_s, feature_s)
        self.b1 = np.zeros((self.hidden_size,1)) # (hid_s, 1)
        
        self.W2 = np.random.randn(self.output_size, self.hidden_size) # (out_s, hid_s)
        self.b2 = np.zeros((self.output_size, 1)) # (out_s, 1)
    
    def sigmoid(self, x): # sigmoid activation function
        return 1/(1+np.exp(-x))
    
    def sigmoid_p(self, x): # derivative of sigmoid function
        return self.sigmoid(x) * (1-self.sigmoid(x))

    def forward_prop(self, X):
        self.Z1 = self.W1.dot(X.T) + self.b1 # (hid_s, feature_s) x (feature_s, sample_s)
        self.A1 = self.sigmoid(self.Z1) # (hid_s, sample_s)
        self.out = self.W2.dot(self.A1) + self.b2 #(out_size, hid_size) x (hid_s, sample_s)

    def mse(self, y): # calculate cost
        return np.sum((self.out-y)**2) / len(y)
    
    def backward_prop(self, X, y, lr):
        # calculate gradients with respect to loss
        self.d_out = self.out - y
        self.dW2 = (1/self.input_size) * self.d_out.dot(self.A1.T)
        self.db2 = (1/self.input_size) * np.sum(self.d_out, axis=1, keepdims = True)
        
        self.dZ1 = self.W2.T.dot(self.d_out) * self.sigmoid_p(self.Z1)
        self.dW1 = (1/self.input_size) * self.dZ1.dot(X)
        self.db1 = (1/self.input_size) * np.sum(self.dZ1, axis=1, keepdims= True)
        
        # update weights using calculated gradients
        self.W2 -= lr*self.dW2
        self.b2 -= lr*self.db2
        self.W1 -= lr*self.dW1
        self.b1 -= lr*self.db1
    
    def one_epoch(self, X, y, lr): # one epoch: forward and back prop 
        self.forward_prop(X)
        self.backward_prop(X, y, lr)
        
    def predict(self, X_pred):
        Z1 = self.W1.dot(X_pred.T) + self.b1
        A1 = self.sigmoid(Z1)
        out = self.W2.dot(A1) + self.b2
        return out
        

In [13]:
epochs = 100
lr = 0.001
hid_size = 20

net = ShallowNet({"input_size": X.shape[1], "hidden_size": hid_size, 'output_size': 1})
for i in range(epochs):
    net.one_epoch(X, y, lr)
    print('Epoch: {} - MSE: {}'.format(i+1, net.mse(y)))

Epoch: 1 - MSE: 507.02439374794574
Epoch: 2 - MSE: 282.77338655994
Epoch: 3 - MSE: 165.6615922825101
Epoch: 4 - MSE: 112.41550337217129
Epoch: 5 - MSE: 92.58335428539773
Epoch: 6 - MSE: 86.10414178882516
Epoch: 7 - MSE: 84.01902516307435
Epoch: 8 - MSE: 83.21886306079644
Epoch: 9 - MSE: 82.77185105947386
Epoch: 10 - MSE: 82.4204931054145
Epoch: 11 - MSE: 82.09660457442592
Epoch: 12 - MSE: 81.78246610995421
Epoch: 13 - MSE: 81.47344686658035
Epoch: 14 - MSE: 81.16826324811854
Epoch: 15 - MSE: 80.86648625917047
Epoch: 16 - MSE: 80.56791161239583
Epoch: 17 - MSE: 80.27240226885233
Epoch: 18 - MSE: 79.9798493649269
Epoch: 19 - MSE: 79.6901623173241
Epoch: 20 - MSE: 79.40326590619092
Epoch: 21 - MSE: 79.11909892369137
Epoch: 22 - MSE: 78.83761312144254
Epoch: 23 - MSE: 78.55877218831434
Epoch: 24 - MSE: 78.28255071810561
Epoch: 25 - MSE: 78.00893317263278
Epoch: 26 - MSE: 77.73791285187438
Epoch: 27 - MSE: 77.4694908816967
Epoch: 28 - MSE: 77.20367522804331
Epoch: 29 - MSE: 76.9404797452393