In [1]:
import numpy as np

**Goal 1: Create perceptron**

In [2]:
def perceptron(x, W, b):
    '''
    Input: weights W, biases b, input activations x
    Output: Single hypothesis
    '''
    z = np.sum(W.T * x) + b
    return sigmoid(z)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def deriv_sigmoid(z):
    return sigmoid(z) * (1-sigmoid(z))

In [3]:
activations = np.array([0.3, 0.1, 0.7, 8])
weights = np.random.rand(activations.shape[0])
bias = 1

perceptron(activations, weights, bias)

0.9921509518294168

**Goal 2: Get, Clean, & Normalize Data**

In [4]:
import pandas as pd
data = pd.read_csv("housepricedata.csv")
print(data.head())

   LotArea  OverallQual  OverallCond  ...  Fireplaces  GarageArea  AboveMedianPrice
0     8450            7            5  ...           0         548                 1
1     9600            6            8  ...           1         460                 1
2    11250            7            5  ...           1         608                 1
3     9550            7            5  ...           1         642                 0
4    14260            8            5  ...           1         836                 1

[5 rows x 11 columns]


In [5]:
houses = data.values # DataFrame --> Array
x = houses[:, :-1] # Input Activations
y = houses[:, -1] # Output labels (0 or 1)
print(x)

[[ 8450     7     5 ...     8     0   548]
 [ 9600     6     8 ...     6     1   460]
 [11250     7     5 ...     6     1   608]
 ...
 [ 9042     7     9 ...     9     2   252]
 [ 9717     5     6 ...     5     0   240]
 [ 9937     5     6 ...     6     0   276]]


In [6]:
from sklearn.preprocessing import MinMaxScaler
x = MinMaxScaler().fit(x).transform(x) # features between 0 and 1

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_valtest, Y_train, Y_valtest = train_test_split(x, y, test_size=0.3) # 70% Train
X_val, X_test, Y_val, Y_test = train_test_split(X_valtest, Y_valtest, test_size=0.5) # 15% to test and val

In [8]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(x.shape)

(1022, 10)
(219, 10)
(219, 10)
(1460, 10)


**Goal 3: Forward Propagation**

In [19]:
#np.random.seed(0)
def forwardProp(activations, weights, biases, zArr, numLayers):
    '''
    Vectorized Implementation
    Input: First Layer activations, weights, biases, number of layers
    Output: Last layer
    '''
    # Layer i
    for i in range(numLayers-1):
        z = np.dot(weights[i], activations[i]) + biases[i]
        zArr.append(z)
        activations.append(sigmoid(z))

params = {
    "weights": [
    np.random.randn(X_train.shape[0], X_train.shape[1]) * np.sqrt(2/X_train.shape[0]), # l1
    np.random.randn(1, X_train.shape[0]) * np.sqrt(2/X_train.shape[0])  # l2
    ],
    
    "biases": 2,    
    
    "numLayers": 3 
}

numLayers = 3
zArr = []
activations = [X_train.T]
weights = [
    np.random.randn(X_train.shape[0], X_train.shape[1]) * np.sqrt(2/X_train.shape[0]), # l1
    np.random.randn(1, X_train.shape[0]) * np.sqrt(2/X_train.shape[0])  # l2
          ]
biases = np.ones(numLayers-1) # bias in each layer except output

forwardProp(activations, weights, biases, zArr, numLayers)

In [16]:
def backwardProp(activations, weights, biases, numLayers, actual, zArr, weightDecay, learning_rate):
    # Compute Cost
    cost = np.mean(1/2 * np.linalg.norm(activations[-1]-actual) ** 2)
    for layer in range(numLayers - 1):
        for i in range(activations[layer].shape[0]):
            for j in range(activations[layer+1].shape[0]):
                cost += weights[layer][j][i] ** 2
    cost *= weightDecay / 2
    print(cost)
    
    # Output layer delta
    deltas = {}
    delta_nL = np.multiply(-(actual - activations[-1]), deriv_sigmoid(zArr[-1]))
    deltas[2] = delta_nL
    
    # Go through hidden layers (l2 --> index 1)
    for l in range(numLayers-2, 0, -1): 
        deltas[l] = np.dot(weights[l].T, deltas[l+1]) * deriv_sigmoid(zArr[l])
        deriv_W = np.dot(deltas[l+1], activations[l].T)
        deriv_b = deltas[l+1]
        
        print(deriv_W)
        weights[l] = weights[l] - learning_rate * deriv_W
        # todo: update biases
    
weightDecay = 0.5
learning_rate = 0.001
backwardProp(activations, weights, biases, numLayers, Y_train, zArr, weightDecay, learning_rate)

53.423091937847055
[[32.86547733 32.78740671 32.67062488 ... 32.93326428 32.79740754
  32.50004513]]


In [17]:
def runNetwork(activations, weights, biases, numLayers, actual, zArr, weightDecay, learning_rate):
    epochs = 10
    for x in range(epochs):
        forwardProp(activations, weights, biases, zArr, numLayers)
        backwardProp(activations, weights, biases, numLayers, Y_train, zArr, weightDecay, learning_rate)
    
runNetwork(activations, weights, biases, numLayers, Y_train, zArr, weightDecay, learning_rate)

69.75010210067936
[[-5.21889777e-08 -5.16251024e-08 -5.17567474e-08 ... -5.27003481e-08
  -5.21971824e-08 -5.13483500e-08]]
69.75010209983735
[[-5.21889797e-08 -5.16251044e-08 -5.17567494e-08 ... -5.27003501e-08
  -5.21971844e-08 -5.13483520e-08]]
69.75010209899575
[[-5.21889818e-08 -5.16251064e-08 -5.17567514e-08 ... -5.27003522e-08
  -5.21971865e-08 -5.13483540e-08]]
69.75010209815407
[[-5.21889838e-08 -5.16251084e-08 -5.17567534e-08 ... -5.27003542e-08
  -5.21971885e-08 -5.13483560e-08]]
69.7501020973124
[[-5.21889858e-08 -5.16251104e-08 -5.17567554e-08 ... -5.27003563e-08
  -5.21971905e-08 -5.13483579e-08]]
69.75010209647048
[[-5.21889878e-08 -5.16251124e-08 -5.17567574e-08 ... -5.27003583e-08
  -5.21971925e-08 -5.13483599e-08]]
69.7501020956288
[[-5.21889899e-08 -5.16251144e-08 -5.17567595e-08 ... -5.27003603e-08
  -5.21971946e-08 -5.13483619e-08]]
69.75010209478705
[[-5.21889919e-08 -5.16251164e-08 -5.17567615e-08 ... -5.27003624e-08
  -5.21971966e-08 -5.13483639e-08]]
69.7501020

In [18]:
print(weights)

[array([[ 0.00769028,  0.03662686, -0.02912688, ...,  0.14495106,
        -0.00359307,  0.01855318],
       [-0.06757856, -0.03443372,  0.04046309, ..., -0.06455656,
        -0.07165728, -0.01360764],
       [ 0.02210058,  0.00506475,  0.02423172, ..., -0.01267709,
         0.01681209,  0.04267638],
       ...,
       [-0.00819039,  0.05742223, -0.06916011, ...,  0.06841757,
        -0.03969034,  0.00594728],
       [ 0.05264275,  0.0436624 ,  0.00443696, ..., -0.07889662,
         0.06002359, -0.06170833],
       [ 0.01759781,  0.01615675, -0.01995902, ...,  0.02891982,
        -0.09179208, -0.07777958]]), array([[-0.17872026,  0.01576308,  0.00265071, ..., -0.10429233,
        -0.00093296, -0.0465038 ]])]


**References**
1. http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/
2. https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78
3. https://medium.com/@vdpatel/implementing-a-multi-layer-perceptron-neural-network-in-python-b22b5a3bdfa3
4. https://www.codeproject.com/articles/821348/multilayer-perceptron-in-python
5. https://www.allaboutcircuits.com/technical-articles/how-to-create-a-multilayer-perceptron-neural-network-in-python/
6. https://vitalflux.com/perceptron-explained-using-python-example/