# Shallow Neural Network (Regression)

A simple numpy implementation of a shallow (one layer) neural network for regression problems. Uses sigmoid activation function in the hidden layer. Backpropagation uses batch gradient descent to update parameters. 

In [9]:
# import packages and data to test program
import numpy
from sklearn import datasets, preprocessing

X = datasets.load_boston().data
X = preprocessing.normalize(X)
y = datasets.load_boston().target

In [10]:
class ShallowNet:
    
    def __init__(self, params):
        # collect desired size of layers
        self.input_size = params['input_size']
        self.hidden_size = params['hidden_size']
        self.output_size = params['output_size']
        
        # initialize weights
        self.W1 = np.random.randn(self.hidden_size, self.input_size) # (hid_s, feature_s)
        self.b1 = np.zeros((self.hidden_size,1)) # (hid_s, 1)
        
        self.W2 = np.random.randn(self.output_size, self.hidden_size) # (out_s, hid_s)
        self.b2 = np.zeros((self.output_size, 1)) # (out_s, 1)
    
    def sigmoid(self, x): # sigmoid activation function
        return 1/(1+np.exp(-x))
    
    def sigmoid_p(self, x): # derivative of sigmoid function
        return self.sigmoid(x) * (1-self.sigmoid(x))

    def forward_prop(self, X):
        self.Z1 = self.W1.dot(X.T) + self.b1 # (hid_s, feature_s) x (feature_s, sample_s)
        self.A1 = self.sigmoid(self.Z1) # (hid_s, sample_s)
        self.out = self.W2.dot(self.A1) + self.b2 #(out_size, hid_size) x (hid_s, sample_s)

    def mse(self, y): # calculate cost
        return np.sum((self.out-y)**2) / len(y)
    
    def backward_prop(self, X, y, lr):
        # calculate gradients with respect to loss
        self.d_out = self.out - y
        self.dW2 = (1/self.input_size) * self.d_out.dot(self.A1.T)
        self.db2 = (1/self.input_size) * np.sum(self.d_out, axis=1, keepdims = True)
        
        self.dZ1 = self.W2.T.dot(self.d_out) * self.sigmoid_p(self.Z1)
        self.dW1 = (1/self.input_size) * self.dZ1.dot(X)
        self.db1 = (1/self.input_size) * np.sum(self.dZ1, axis=1, keepdims= True)
        
        # update weights using calculated gradients
        self.W2 -= lr*self.dW2
        self.b2 -= lr*self.db2
        self.W1 -= lr*self.dW1
        self.b1 -= lr*self.db1
    
    def one_epoch(self, X, y, lr): # one epoch: forward and back prop 
        self.forward_prop(X)
        self.backward_prop(X, y, lr)
        
    def predict(self, X_pred):
        Z1 = self.W1.dot(X_pred.T) + self.b1
        A1 = self.sigmoid(Z1)
        out = self.W2.dot(A1) + self.b2
        return out
        

Testing my class on sample dataset (Scikit-learn's built in Boston housing data)

In [11]:
# set parameters
epochs = 100 # number of epochs
lr = 0.001 # learning rate
hid_size = 20 # number of hidden nodes

net = ShallowNet({"input_size": X.shape[1], "hidden_size": hid_size, 'output_size': 1}) # instantiate class
for i in range(epochs):
    net.one_epoch(X, y, lr)
    print('Epoch: {} - MSE: {}'.format(i+1, net.mse(y)))

Epoch: 1 - MSE: 793.0345556683468
Epoch: 2 - MSE: 362.87716141492405
Epoch: 3 - MSE: 194.2119238284208
Epoch: 4 - MSE: 122.08802074000201
Epoch: 5 - MSE: 94.23555429501137
Epoch: 6 - MSE: 84.39322553759308
Epoch: 7 - MSE: 81.02411345296193
Epoch: 8 - MSE: 79.80099187555813
Epoch: 9 - MSE: 79.25618599968593
Epoch: 10 - MSE: 78.9203150166109
Epoch: 11 - MSE: 78.64833191968954
Epoch: 12 - MSE: 78.39636763625683
Epoch: 13 - MSE: 78.15137771139001
Epoch: 14 - MSE: 77.90956951612256
Epoch: 15 - MSE: 77.66989707298116
Epoch: 16 - MSE: 77.43212186305499
Epoch: 17 - MSE: 77.19624025195081
Epoch: 18 - MSE: 76.96231591795106
Epoch: 19 - MSE: 76.73043103688902
Epoch: 20 - MSE: 76.50067179453062
Epoch: 21 - MSE: 76.27312362036895
Epoch: 22 - MSE: 76.0478691133489
Epoch: 23 - MSE: 75.82498672786402
Epoch: 24 - MSE: 75.60454973304854
Epoch: 25 - MSE: 75.38662534824276
Epoch: 26 - MSE: 75.17127405018766
Epoch: 27 - MSE: 74.95854906058393
Epoch: 28 - MSE: 74.74849601697166
Epoch: 29 - MSE: 74.541152821