In [3]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [103]:
iris_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_df = pd.read_csv(iris_url, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
iris_np = iris_df.values

In [104]:
map_dict = {'Iris-setosa': 0,
            'Iris-versicolor': 1,
            'Iris-virginica': 2}

map_func = np.vectorize(lambda x: map_dict[x])
iris_np[:, -1] = map_func(iris_np[:, -1])

In [105]:
test_indices = np.concatenate((np.random.choice(np.arange(0, 50), size=10, replace=False), 
                               np.random.choice(np.arange(50, 100), size=10, replace=False), 
                               np.random.choice(np.arange(100, 150), size=10, replace=False)))
train_indices = np.array(list(set(np.arange(0, 150)) - set(test_indices)))

In [153]:
train, test = iris_np[train_indices,:], iris_np[test_indices,:]

def process(dataset):
    X = dataset[:, :-1].astype(float)
    y = dataset[:, -1].astype(int)
    y_one_hot = np.zeros((len(y), max(y)+1))
    y_one_hot[np.arange(len(y)), y] = 1
    return X, y_one_hot

X, y = process(train)

In [145]:
class Layer():
    def __init__(self):
        pass
    def forward(self, input):
        pass
    def backward(self, input_gradient):
        pass

In [210]:
class MultiplyLayer(Layer):
    def __init__(self, input_size, hidden_units):
        self.weights = np.random.normal(size=(input_size, hidden_units))
        self.bias = np.random.normal(size=hidden_units)
        
    def forward(self, input):
        self.input = input
        self.output = np.dot(self.weights.transpose(), self.input) + self.bias
        return self.output
    
    def backward(self, input_gradient):
        self.weights_gradient = np.outer(self.input, input_gradient)
        self.bias_gradient = input_gradient
        self.input_gradient = np.dot(self.weights, input_gradient)
        return self.input_gradient

In [211]:
x0 = X[0]
mul_layer = MultiplyLayer(4, 6)

mul_layer.forward(x0)
print("Outputs\n", mul_layer.output)

mul_layer.backward(np.random.normal(size=6))
print("\nWeights Gradient\n", mul_layer.weights_gradient)
print("\nBias Gradient\n", mul_layer.bias_gradient)
print("\nInput Gradient\n", mul_layer.input_gradient)

Outputs
 [ 3.81709679 -3.25386426  7.69996012  0.12172383 -5.1335087  -4.08371983]

Weights Gradient
 [[ 1.29818117 -1.09968535  6.06715682 -7.80064841 -4.61036132 11.80617658]
 [ 0.89090865 -0.75468603  4.16373508 -5.35338616 -3.16397345  8.10227805]
 [ 0.35636346 -0.30187441  1.66549403 -2.14135446 -1.26558938  3.24091122]
 [ 0.05090907 -0.04312492  0.23792772 -0.30590778 -0.18079848  0.46298732]]

Bias Gradient
 [ 0.25454533 -0.21562458  1.18963859 -1.5295389  -0.90399242  2.31493658]

Input Gradient
 [ 2.18958932 -1.20645166 -2.48033123 -0.33620507]


In [290]:
class ReLU(Layer):
    def __init__(self):
        pass
    
    def forward(self, input):
        self.input = input
        map_grad = np.vectorize(lambda x: 1 if x > 0 else 0)
        self.local_gradient = map_grad(self.input).astype(float)
        self.output = np.maximum(np.zeros(input.shape), input)
        return self.output
    
    def backward(self, input_gradient):
        self.gradient = self.local_gradient * input_gradient # in the forward pass, zeros are handled, so the same zeros will still be zeros in the gradients
        return self.gradient

In [291]:
z1 = mul_layer.output
relu = ReLU()

relu.forward(z1)
print("Outputs\n", relu.output)
print("Local Gradient\n", relu.local_gradient)

Outputs
 [3.81709679 0.         7.69996012 0.12172383 0.         0.        ]
Local Gradient
 [1. 0. 1. 1. 0. 0.]


In [220]:
class Softmax(Layer):
    def __init__(self):
        pass
    
    def forward(self, input):
        self.input = input
        input_exp = np.exp(self.input)
        sum_exp = sum(input_exp)
        self.output = input_exp / sum_exp
        return self.output
        
    def backward(self, input_gradient):
        output_size = len(self.output)
        self.local_gradient = np.ones((output_size, output_size))
        for i in range(output_size):
            for j in range(output_size):
                if i == j:
                    self.local_gradient[i][j] = self.output[i] - self.output[i]**2
                else:
                    self.local_gradient[i][j] = -1 * self.output[i] * self.output[j]
        self.gradient = np.dot(self.local_gradient, input_gradient)
        return self.gradient

In [200]:
softmax = Softmax()

softmax.forward(z1)
print("Softmax\n", softmax.output)

Softmax
 [8.18783173e-01 1.74255283e-01 5.70374506e-07 1.09366913e-03
 4.02686323e-05 5.82703533e-03]


In [201]:
class CrossEntropyLoss(Layer):
    def __init__(self):
        pass
    
    def forward(self, input, label):
        self.input = input
        self.label = label
        self.loss = -1 * np.dot(self.label, np.log(self.input))
        return self.loss
    
    def backward(self):
        self.gradient = -1 * np.divide(self.label, self.input)
        return self.gradient

In [202]:
loss = CrossEntropyLoss()
softmax = Softmax()
layer1 = MultiplyLayer(4, 3)

print("X\n", X[0])
print("\ny\n", y[0])

layer1.forward(X[0])
print("\nLayer 1 Output\n", layer1.output)
print("\nLayer 1 Weights\n", layer1.weights)

softmax.forward(layer1.output)
print("\nPrediction\n", softmax.output)

loss.forward(softmax.output, y[0])
print("\nLoss\n", loss.loss)

X
 [5.1 3.5 1.4 0.2]

y
 [1. 0. 0.]

Layer 1 Output
 [-4.68775714  6.79915466 -6.13259108]

Layer 1 Weights
 [[ 0.62075259  1.1640649  -0.84383394]
 [-1.9700508  -0.02372526 -0.78859462]
 [-0.79680868  0.45923137  0.1285176 ]
 [-1.85980172 -1.45545795 -0.16115065]]

Prediction
 [1.02634196e-05 9.99987317e-01 2.41996271e-06]

Loss
 11.486924483755933


In [207]:
loss.backward()
print("\nGradient of Loss w.r.t Prediction\n", loss.gradient)

softmax.backward(loss.gradient)
print("\nLocal Gradient of Softmax Output w.r.t Layer1 Output\n", softmax.local_gradient)
print("\nGradient of Loss w.r.t Layer1 Output\n", softmax.gradient)

layer1.backward(softmax.gradient)
print("\nWeights Gradient\n", layer1.weights_gradient)
print("\Bias Gradient\n", layer1.bias_gradient)


Gradient of Loss w.r.t Prediction
 [-97433.41335777     -0.             -0.        ]

Local Gradient of Softmax Output w.r.t Layer1 Output
 [[ 1.02633142e-05 -1.02632894e-05 -2.48370926e-11]
 [-1.02632894e-05  1.26832214e-05 -2.41993202e-06]
 [-2.48370926e-11 -2.41993202e-06  2.41995686e-06]]

Gradient of Loss w.r.t Layer1 Output
 [-9.99989737e-01  9.99987317e-01  2.41996271e-06]

Weights Gradient
 [[-5.09994766e+00  5.09993531e+00  1.23418098e-05]
 [-3.49996408e+00  3.49995561e+00  8.46986950e-06]
 [-1.39998563e+00  1.39998224e+00  3.38794780e-06]
 [-1.99997947e-01  1.99997463e-01  4.83992543e-07]]
\Bias Gradient
 [-9.99989737e-01  9.99987317e-01  2.41996271e-06]


In [299]:
class NeuralNetwork():
    def __init__(self):
        self.layers = [
            MultiplyLayer(4, 6),
            ReLU(),
            MultiplyLayer(6, 4),
            ReLU(),
            MultiplyLayer(4, 3),
            Softmax()
        ]
        self.loss_func = CrossEntropyLoss()
    
    def forward(self, x, y=None, logging=False):
        z = x
        if logging:
            print("Input: ", z, "Label: ", y)
            
        for layer in self.layers:
            z = layer.forward(z)
            if logging:
                print("\n", type(layer), "\nOutput: ", z, "\nShape: ", z.shape)
                
        if y is not None:
            self.loss = self.loss_func.forward(z, y)
            if logging:
                print("\nLoss: ", self.loss)
                
        return z
    
    def backward(self, logging=False):
        dz = self.loss_func.backward()
        if logging:
            print("dL/dpred: ", dz)
            
        for layer in self.layers[::-1]:
            dz = layer.backward(dz)
            if logging:
                print("\n", type(layer), "\nGradient: ", dz)
    
    def SGD(self, X, y, epochs, logging=False, lr=0.01):
        for e in range(epochs):
            for i in range(len(X)):
                self.forward(X[i], y[i])
                self.backward()
                for layer in self.layers:
                    if isinstance(layer, MultiplyLayer):
                        layer.weights -= lr * layer.weights_gradient
                        layer.bias -= lr * layer.bias_gradient

            if logging:
                n_accurate = 0
                for i in range(len(X)):
                    pred = np.argmax(self.forward(X[i]))
                    label = np.argmax(y[i])
                    if pred == label:
                        n_accurate += 1
                print("Epoch ", e, "\n", "-"*50, "\n")
                print("Accuracy: ", n_accurate/len(X))

In [300]:
nn = NeuralNetwork()

In [307]:
nn.SGD(X, y, 100, logging=True, lr=.001)

Epoch  0 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  1 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  2 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  3 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  4 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  5 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  6 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  7 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  8 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  9 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  10 
 ------------------------------------------------

Epoch  87 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  88 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  89 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  90 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  91 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  92 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  93 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  94 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  95 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  96 
 -------------------------------------------------- 

Accuracy:  0.9583333333333334
Epoch  97 
 --------------------------------------

In [310]:
X_test, y_test = process(test)
n_accurate = 0
for i in range(len(X_test)):
    pred = np.argmax(nn.forward(X_test[i]))
    label = np.argmax(y_test[i])
    if pred == label:
        n_accurate += 1
print("Test Accuracy: ", n_accurate/len(X_test))

Test Accuracy:  0.9666666666666667


In [None]:
class MultiplyLayer2(Layer):
    def __init__(self, input_size, hidden_units):
        self.weights = np.random.normal(size=(input_size, hidden_units))
        self.bias = np.random.normal(size=hidden_units)
        
    def forward(self, input):
        self.input = input
        self.output = np.dot(self.weights.transpose(), self.input) + self.bias
        return self.output
    
    def backward(self, input_gradient):
        self.weights_gradient = np.outer(self.input, input_gradient)
        self.bias_gradient = input_gradient
        self.input_gradient = np.dot(self.weights, input_gradient)
        return self.input_gradient
    
class ReLU2(Layer):
    def __init__(self):
        pass
    
    def forward(self, input):
        self.input = input
        map_grad = np.vectorize(lambda x: 1 if x > 0 else 0)
        self.local_gradient = map_grad(self.input).astype(float)
        self.output = np.maximum(np.zeros(input.shape), input)
        return self.output
    
    def backward(self, input_gradient):
        self.gradient = self.local_gradient * input_gradient # in the forward pass, zeros are handled, so the same zeros will still be zeros in the gradients
        return self.gradient
    
class Softmax2(Layer):
    def __init__(self):
        pass
    
    def forward(self, input):
        self.input = input
        input_exp = np.exp(self.input)
        sum_exp = sum(input_exp)
        self.output = input_exp / sum_exp
        return self.output
        
    def backward(self, input_gradient):
        output_size = len(self.output)
        self.local_gradient = np.ones((output_size, output_size))
        for i in range(output_size):
            for j in range(output_size):
                if i == j:
                    self.local_gradient[i][j] = self.output[i] - self.output[i]**2
                else:
                    self.local_gradient[i][j] = -1 * self.output[i] * self.output[j]
        self.gradient = np.dot(self.local_gradient, input_gradient)
        return self.gradient
    
class CrossEntropyLoss2(Layer):
    def __init__(self):
        pass
    
    def forward(self, input, label):
        self.input = input
        self.label = label
        self.loss = -1 * np.dot(self.label, np.log(self.input))
        return self.loss
    
    def backward(self):
        self.gradient = -1 * np.divide(self.label, self.input)
        return self.gradient

class NeuralNetwork2():
    def __init__(self):
        self.layers = [
            MultiplyLayer2(4, 6),
            ReLU2(),
            MultiplyLayer2(6, 4),
            ReLU2(),
            MultiplyLayer2(4, 3),
            Softmax2()
        ]
        self.loss_func = CrossEntropyLoss2()
    
    def forward(self, X, Y=None, logging=False):
        Z = X
        if logging:
            print("Input Shape: ", X.shape(), " Label Shape: ", Y.shape())
            
        for layer in self.layers:
            Z = layer.forward(Z)
            if logging:
                print("\n", type(layer), "\nOutput: ", z, "\nShape: ", z.shape)
                
        if y is not None:
            self.loss = self.loss_func.forward(z, y)
            if logging:
                print("\nLoss: ", self.loss)
                
        return z
    
    def backward(self, logging=False):
        dz = self.loss_func.backward()
        if logging:
            print("dL/dpred: ", dz)
            
        for layer in self.layers[::-1]:
            dz = layer.backward(dz)
            if logging:
                print("\n", type(layer), "\nGradient: ", dz)
    
    def SGD(self, X, y, epochs, logging=False, lr=0.01):
        for e in range(epochs):
            for i in range(len(X)):
                self.forward(X[i], y[i])
                self.backward()
                for layer in self.layers:
                    if isinstance(layer, MultiplyLayer):
                        layer.weights -= lr * layer.weights_gradient
                        layer.bias -= lr * layer.bias_gradient

            if logging:
                n_accurate = 0
                for i in range(len(X)):
                    pred = np.argmax(self.forward(X[i]))
                    label = np.argmax(y[i])
                    if pred == label:
                        n_accurate += 1
                print("Epoch ", e, "\n", "-"*50, "\n")
                print("Accuracy: ", n_accurate/len(X))