# Cole Lewis, @colelewis on GitHub
## Quiz 10/20/2022

In [170]:
# import and initial methods; support multi-layer perceptron class
import numpy as np

def linear(x, weights, bias):
    return np.dot(x, weights) + bias

def sigmoid(x): # used below for activation, compresses input values to: 0 < new_value < 1
    return 1.0 / (1.0 + np.exp(-x))

def to_onehot(y, num_classes):
    y_onehot = np.zeros((y.shape[0], num_classes))
    y_onehot[np.arange(y.size), y] = 1
    return y_onehot

In [171]:
class MuLP():
    def __init__(self, num_features, num_hidden, num_classes):
        self.num_features = num_features
        self.num_hidden = num_hidden
        self.num_classes = num_classes
        
        # if you want to extend the number of hidden layers, you can simply repeat the following two lines
        self.weights_hidden = np.zeros((num_hidden, num_features), dtype = float)
        self.bias_hidden = np.zeros(num_hidden, dtype= float)
        
        self.weights_output = np.zeros((num_classes, num_hidden), dtype= float)
        self.bias_output = np.zeros((num_classes), dtype= float)
        
    def predict(self, x):
        z1 = linear(x, np.transpose(self.weights_hidden), self.bias_hidden)
        a1 = sigmoid(z1)

        z2 = linear(a1, np.transpose(self.weights_output), self.bias_output)
        a2 = sigmoid(z2)

        return a1, a2

    def calc_mse(self, x, y):
        _, yhat = self.predict(x)
        error = np.power(yhat - y, 2).mean()

    def backward(self, x, a1, a2, y):
        
        # Output layer gradient
        # calculate partial derivatives for the output layer's weights
        dloss_da2 = 2.0 * (a2 - y) / y.shape[0]
        da2_dz2 = a2 * (1. - a2) # sigmoid derivative
        dloss_dz2 = dloss_da2 * da2_dz2
        dloss_dw_output = np.dot(np.transpose(dloss_dz2), a1)
        dloss_db_output = np.sum(dloss_dz2)

        # calculate partial derivatives with respect to hidden layer's weights
        dz2_da1 = self.weights_output
        dloss_da1 = np.dot(dloss_dz2, dz2_da1)
        da1_dz1 = a1 * (1. - a1) # sigmoid derivative
        dz1_dw_hidden = x
        dloss_dz1 = dloss_da1 * da1_dz1
        
        dloss_dw_hidden = np.dot(np.transpose(dloss_dz1), dz1_dw_hidden)
        dloss_db_hidden = np.sum(dloss_dz1)

        return dloss_dw_output, dloss_db_output, dloss_dw_hidden, dloss_db_hidden

    def train(self, x, y, batch_size, epochs, lr = 0.1):
        
        minibatch_error = []
        epoch_error = [] 

        for e in range(epochs):
            for batch_idx in range(batch_size + 1):

                # Extract minibatches
                X_mini = x[batch_idx * batch_size:(batch_idx + 1) * batch_size, :] # input values: x1, x2
                y_mini = y[batch_idx * batch_size:(batch_idx + 1) * batch_size, :] # output value: y
                
                # Compute outputs
                a1, a2 = self.predict(X_mini)

                # Calculate gradients
                dloss_dw_output, dloss_db_output, dloss_dw_hidden, dloss_db_hidden = self.backward(X_mini, a1, a2, y_mini)

                # Update weights
                self.weights_hidden -= lr * dloss_dw_hidden
                self.bias_hidden -= lr * dloss_db_hidden
                self.weights_output -= lr * dloss_dw_output
                self.bias_output -= lr * dloss_db_output

                minibatch_error.append(self.calc_mse(X_mini, y_mini))

            epoch_error.append(self.calc_mse(x, y))
            # print(f'Epoch: {e}, MSE: {self.calc_mse(x, y)}')

        return minibatch_error, epoch_error
        # minibatch_error returns error from predicting each item in the batch
        # epoch_error returns error over the entire training epoch (all items in minibatch)

The goal here is to implement various logic gates using a multi-level perceptron. Input data will be given as truth tables, as seen below. Since XOR was already done and demonstrated, we will implement NAND in this demonstration, however, any of them will work. 

In [172]:
or_data = [
    [
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ], [0, 1, 1, 1]
]

nor_data = [
    [
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ], [1, 0, 0, 0]
]

and_data = [
    [
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ], [0, 0, 0, 1]
]

nand_data = [
    [
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ], [1, 1, 1, 0]
]

data = nand_data # chosen as an example

X = np.array(data[0]) # base line bit sequence for logic gate truth table
y = np.array(data[1])
y = y.reshape(-1, 1)
y = y.astype(int)

# # Shuffling & train/test split
# shuffle_idx = np.arange(y.shape[0])
# shuffle_rng = np.random.RandomState(999)
# shuffle_rng.shuffle(shuffle_idx)
# X, y = X[shuffle_idx], y[shuffle_idx]

In [173]:
# train!

mulp = MuLP(num_features=2, num_hidden=1, num_classes=2)

RANDOM_SEED = 1
BATCH_SIZE = 10
NUM_EPOCHS = 500
LEARNING_RATE = 0.05

mulp.train(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, lr=LEARNING_RATE)

  error = np.power(yhat - y, 2).mean()
  ret = ret.dtype.type(ret / rcount)


([None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,


Below, we'll run predictions on our trained model, let's begin with NAND.

In [175]:
# 0 NAND 0 -> 1
mulp.predict((0, 0))[0]

# 0 NAND 1 -> 1
mulp.predict((0, 1))[0]

# 1 NAND 0 -> 1
mulp.predict((1, 0))[0]

# 1 NAND 1 -> 0
mulp.predict((1, 1))[0]


array([0.45097428])