# HW3 | Problem 5: Neural Network
#### Cormac Taylor
#### I pledge my honor that I have abided by the Stevens Honor System.

# Installs

In [336]:
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install scikit-learn
!{sys.executable} -m pip install ucimlrepo



# Imports
- NumPy
- Pandas
- Sklearn
- fetch_ucirepo

In [337]:
import numpy as np
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo

# Set Up

### Get raw data

In [338]:
# R. Fisher. "Iris," UCI Machine Learning Repository, 1936. [Online]. Available: https://doi.org/10.24432/C56C76.
iris = fetch_ucirepo(id=53) 

raw_X = iris.data.features
raw_y = iris.data.targets 

classes = np.unique(raw_y)
print(f"classes: {classes}")

raw_X

classes: ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


Unnamed: 0,sepal length,sepal width,petal length,petal width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


### Normalize and Split

In [339]:
raw_X = np.array(raw_X)
raw_y = np.array(raw_y)

# standard gaussian on data values
X = (raw_X - np.mean(raw_X, axis=0)) / np.std(raw_X, axis=0)

# mapping string class to vector 
class_map = {val: i for (i, val) in enumerate(classes)}
y = np.zeros((np.shape(raw_y)[0], np.shape(classes)[0]))
for (i, val) in enumerate(raw_y):
    y[i][class_map[val[0]]] = 1
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0

# Part 1: Neural Network (sigmoid; one hidden layer)

In [None]:
# https://dev.to/shamdasani/build-a-flexible-neural-network-with-backpropagation-in-python
class NeuralNetwork:
    
    def __init__(self, inputSize, outputSize, hiddenSize):
        self.inputSize = inputSize
        self.outputSize = outputSize
        self.hiddenSize = hiddenSize
        
        self.W1 = np.random.randn(self.inputSize, self.hiddenSize)
        self.b1 = np.zeros((1, self.hiddenSize))
        
        self.W2 = np.random.randn(self.hiddenSize, self.outputSize)
        self.b2 = np.zeros((1, self.outputSize))

    def forward(self, X):
        
        self.Z1 = np.dot(X, self.W1) + self.b1          # X1 = W1 * X + b1
        self.H1 = self.sigmoid(self.Z1)                 # H1 = sigmoid(Z1)
        
        self.Z2 = np.dot(self.H1, self.W2) + self.b2    # Z2 = W2 * H1 + b2
        self.H2 = self.softmax(self.Z2)                 # H2 = sigmoid(Z2)

        return self.H2

    def train(self, X, y, epochs=1000, learning_rate=0.1):
        for _ in range(epochs):
            y_hat = self.forward(X)
            N_inv = 1 / np.shape(X)[0]
            
            self.dZ2 = y_hat - y                                                # dZ2 = H2 - y
            self.dW2 = N_inv * np.dot(self.H1.T, self.dZ2)                      # dW2 = 1/N * dZ2 * H1^T
            self.db2 = N_inv * np.sum(self.dZ2, axis=0, keepdims=True)          # db2 = 1/N * S(dZ1)
        
            self.dZ1 = np.dot(self.dZ2, self.W2.T) * self.sigmoidPrime(self.H1) # dZ1 = W2^T * dZ2 * H1 * (1 - H1)
            self.dW1 = N_inv * np.dot(X.T, self.dZ1)                            # dW1 = 1/N * dZ1 * X^T
            self.db1 = N_inv * np.sum(self.dZ1, axis=0, keepdims=True)          # db1 = 1/N * S(dz1)
        
            self.W2 -= learning_rate * self.dW2
            self.b2 -= learning_rate * self.db2
            
            self.W1 -= learning_rate * self.dW1
            self.b1 -= learning_rate * self.db1

    def predict(self, X):
        y_hat = self.forward(X)
        prediction = np.zeros((np.shape(y_hat)[0], np.shape(y_hat)[1]))
        argmax = np.argmax(y_hat, axis=1)
        for (i, j) in enumerate(argmax):
            prediction[i][j] = 1
        return prediction
    
    def accuracy(self, X, y):
        y_hat = self.predict(X)

        count = 0
        for (i, yi) in enumerate(y):
            yi_ind = np.argmax(yi)
            count += y_hat[i][yi_ind]

        return (count / np.shape(y)[0])

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoidPrime(self, x):
        return x * (1 - x)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)


# Model Validation

In [429]:
HIDDEN_LAYERS = 32
nn = NeuralNetwork(np.shape(X_train)[1], np.shape(y_train)[1], HIDDEN_LAYERS)
nn.train(X_train, y_train)

training_accuracy = nn.accuracy(X_train, y_train)
testing_accuracy = nn.accuracy(X_test, y_test)

print(f"Training Accuracy: {training_accuracy*100:.2f}%")
print(f"Testing Accuracy: {testing_accuracy*100:.2f}%")


Training Accuracy: 98.33%
Testing Accuracy: 100.00%


# Part 2: Neural Network (ReLU; variable hidden layers)

In [485]:
# https://dev.to/shamdasani/build-a-flexible-neural-network-with-backpropagation-in-python
class ModifiedNeuralNetwork:
    
    def __init__(self, layer_sizes):
        if len(layer_sizes) <= 2:
            raise ValueError("Must have at least 1 hidden layer. Form: [input, hidden1, hidden2, ..., output]")

        self.LAYER_SIZES = layer_sizes
        self.LAYER_SIZES_LENGTH = len(layer_sizes)
        self.NUM_TRANSITIONS = len(layer_sizes) - 1

        
        self.W_arr = [None] * self.NUM_TRANSITIONS
        self.b_arr = [None] * self.NUM_TRANSITIONS
        for i in range(1, self.LAYER_SIZES_LENGTH):
            self.W_arr[i-1] = np.random.randn(self.LAYER_SIZES[i-1], self.LAYER_SIZES[i])
            self.b_arr[i-1] = np.zeros((1, self.LAYER_SIZES[i]))
            
        self.Z_arr = [None] * self.NUM_TRANSITIONS
        self.H_arr = [None] * self.NUM_TRANSITIONS
        
        self.dZ_arr = [None] * self.NUM_TRANSITIONS
        self.dW_arr = [None] * self.NUM_TRANSITIONS
        self.db_arr = [None] * self.NUM_TRANSITIONS
            
    def forward(self, X):
        
        self.Z_arr[0] = np.dot(X, self.W_arr[0]) + self.b_arr[0]
        self.H_arr[0] = self.ReLU(self.Z_arr[0])

        for i in range(1, self.NUM_TRANSITIONS - 1):
            self.Z_arr[i] = np.dot(self.H_arr[i-1], self.W_arr[i]) + self.b_arr[i]
            self.H_arr[i] = self.ReLU(self.Z_arr[i])

        self.Z_arr[self.NUM_TRANSITIONS - 1] = np.dot(self.H_arr[self.NUM_TRANSITIONS - 2], self.W_arr[self.NUM_TRANSITIONS - 1]) + self.b_arr[self.NUM_TRANSITIONS - 1]
        self.H_arr[self.NUM_TRANSITIONS - 1] = self.softmax(self.Z_arr[self.NUM_TRANSITIONS - 1])

        return self.H_arr[self.NUM_TRANSITIONS - 1]

    def train(self, X, y, epochs=1000, learning_rate=0.1):
        for _ in range(epochs):
            y_hat = self.forward(X)
            N_inv = 1 / np.shape(X)[0]
            
            idx = self.NUM_TRANSITIONS - 1
            self.dZ_arr[idx] = y_hat - y
            self.dW_arr[idx] = N_inv * np.dot(self.H_arr[idx-1].T, self.dZ_arr[idx])
            self.db_arr[idx] = N_inv * np.sum(self.dZ_arr[idx], axis=0, keepdims=True)
        
            for i in range(self.NUM_TRANSITIONS - 2, 0, -1):
                self.dZ_arr[i] = np.dot(self.dZ_arr[i+1], self.W_arr[i+1].T) * self.ReLUPrime(self.H_arr[i])
                self.dW_arr[i] = N_inv * np.dot(self.H_arr[i-1].T, self.dZ_arr[i])
                self.db_arr[i] = N_inv * np.sum(self.dZ_arr[i], axis=0, keepdims=True)

            idx = 0
            self.dZ_arr[idx] = np.dot(self.dZ_arr[idx+1], self.W_arr[idx+1].T) * self.ReLUPrime(self.H_arr[idx])
            self.dW_arr[idx] = N_inv * np.dot(X.T, self.dZ_arr[idx])
            self.db_arr[idx] = N_inv * np.sum(self.dZ_arr[idx], axis=0, keepdims=True)

            for i in range(self.NUM_TRANSITIONS - 1, -1, -1):
                self.W_arr[i] -= learning_rate * self.dW_arr[i]
                self.b_arr[i] -= learning_rate * self.db_arr[i]

    def predict(self, X):
        y_hat = self.forward(X)
        prediction = np.zeros((np.shape(y_hat)[0], np.shape(y_hat)[1]))
        argmax = np.argmax(y_hat, axis=1)
        for (i, j) in enumerate(argmax):
            prediction[i][j] = 1
        return prediction
    
    def accuracy(self, X, y):
        y_hat = self.predict(X)

        count = 0
        for (i, yi) in enumerate(y):
            yi_ind = np.argmax(yi)
            count += y_hat[i][yi_ind]

        return (count / np.shape(y)[0])

    def ReLU(self, x):
        return np.maximum(0, x)

    def ReLUPrime(self, x):
        sign_x = np.sign(x)
        return np.maximum(0, sign_x)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)


# Model Validation

In [539]:
LAYERS = [np.shape(X_train)[1], 32, 16, np.shape(y_train)[1]]
mnn = ModifiedNeuralNetwork(LAYERS)

mnn.train(X_train, y_train)

training_accuracy = mnn.accuracy(X_train, y_train)
testing_accuracy = mnn.accuracy(X_test, y_test)

print(f"Training Accuracy: {training_accuracy*100:.2f}%")
print(f"Testing Accuracy: {testing_accuracy*100:.2f}%")


Training Accuracy: 100.00%
Testing Accuracy: 100.00%
