In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

# Neural Network Time

### Actually still more data stuff

In [6]:
df = pd.read_csv("MNIST.csv")
data = df.to_numpy()

In [3]:
# display(df.head())
# print(data[0])

# for i in range(0, 5):
#     pixels = data[i][:784].reshape(28,28)
#     plt.imshow(pixels, cmap='gray')
#     plt.show()

In [4]:
def split(data, train_proportion = 0.8):
    np.random.shuffle(data)
    m, n = data.shape

    train_size = math.floor(train_proportion * m)

    train = data[:train_size]
    test = data[train_size:m]

    X_train = train.T[:n-1].T
    X_train = X_train / np.max(X_train)
    y_train = train.T[n-1].T

    X_test = test.T[:n-1].T
    X_test = X_test / np.max(X_train)
    y_test = test.T[n-1].T

    return X_train, y_train, X_test, y_test


X_train, y_train, X_test, y_test = split(data, 0.9)

### Actual Neural Network Stuff

In [7]:
class MLPClassifier:
    def __init__(self, layers = (20,)) -> None:
        self.layers = layers
        self.attributes = []
        self.attr_z = []
        for i in layers:
            self.attributes.append(np.random.rand(1, i) - .5)
            self.attr_z.append(np.random.rand(1, i) - .5)

        self.weights = []
        self.biases = []

        for i in range(1, len(layers)):
            n = layers[i - 1]
            m = layers[i]

            self.weights.append(np.random.rand(m, n) - .5)
            self.biases.append(np.random.rand(1, m) - .5)
    
    def ReLU(self, X):
        return np.maximum(0, X)

    def softmax(self,Z):
        Z -= np.max(Z.T, axis=0).T  # Subtract max value for numerical stability
        A = np.exp(Z) / np.sum(np.exp(Z), axis=0)
        return A
        
    def one_hot(self, Y):
        Y = Y.T
        n, = Y.shape
        m, = np.unique(y_train).shape

        y = np.zeros((n, m))

        y[np.arange(n), Y] = 1
        
        return y
    
    def forward_prop(self, input):
        a = input
        for (a_index, w, b) in zip(range(len(self.attributes)), self.weights, self.biases):
            z = np.dot(w, a.T).T + b
            a = self.ReLU(z)
            self.attributes[a_index] = a
            self.attr_z[a_index] = z
        
        return np.apply_along_axis(self.softmax, 1, a)

    def fit(self, X_train, y_train, iterations = 100, learning_rate = 0.01):
        self.__init__(self.layers)

        samples, n = X_train.shape
        _, m = self.attributes[0].shape
        self.weights.insert(0, np.random.rand(m, n) - .5)
        self.biases.insert(0, np.random.rand(1, m) - .5)

        m, = np.unique(y_train).shape
        _, n = self.attributes[-1].shape
        self.weights.append(np.random.rand(m, n) - .5)
        self.biases.append(np.random.rand(1, m) - .5)
        self.attributes.append(np.random.rand(1, m) - .5)
        self.attr_z.append(np.random.rand(1, m) - .5)

        self.gradient_descent(X_train, y_train, iterations, learning_rate)

    def predict(self, input):
        a = input
        for (w, b) in zip(self.weights, self.biases):
            z = np.dot(w, a.T).T + b
            a = self.ReLU(z)
                
        p = np.apply_along_axis(self.softmax, 1, a)

        return np.apply_along_axis(np.argmax, 1, p)
    
    def derivReLU(self, Z):
        return Z > 0

    # Backpropogation
    def back_propagation(self, X_train, y_train, alpha=0.01):
        self.forward_prop(X_train)

        n, _ = self.attributes[0].shape
        self.attributes.insert(0, X_train)

        num_layers = len(self.attributes) - 1

        y = self.one_hot(y_train)
        delta_y = 2 * (self.attributes[-1] - y).T

        for i in range(num_layers, 0, -1):
            dW = np.dot(delta_y, self.attributes[i - 1]) / n
            dB = np.apply_along_axis(np.sum, 1, delta_y) / n

            W = self.weights[i-1]
            b = self.biases[i-1]

            self.weights[i-1] = W - alpha * dW
            self.biases[i-1] = b - alpha * dB

            delta_y = np.dot(W.T, delta_y) * self.derivReLU(self.attributes[i - 1]).T

        
        self.attributes.pop(0)
        
    # Calculate accuracy (to see if the NN is working)
    def calc_accuracy(self, X, y):
        return np.sum(y == self.predict(X)) / y.size


    # Gradient descent
    def gradient_descent(self, X_train, y_train, iterations=500, learning_rate=0.01):
        print("----------------------------------------------------------------")
        for i in range(iterations):
            self.back_propagation(X_train, y_train, learning_rate)


            if (not i%10):
                print("Accuracy for Iteration", i, "is:", self.calc_accuracy(X_train, y_train))
                print("----------------------------------------------------------------")


In [8]:
mlp_nn = MLPClassifier((10,))

In [9]:
mlp_nn.fit(X_train, y_train, 100, 0.01)

----------------------------------------------------------------
Accuracy for Iteration 0 is: 0.16892063492063492
----------------------------------------------------------------


  Z -= np.max(Z.T, axis=0).T  # Subtract max value for numerical stability


Accuracy for Iteration 10 is: 0.09895238095238096
----------------------------------------------------------------
Accuracy for Iteration 20 is: 0.09895238095238096
----------------------------------------------------------------


KeyboardInterrupt: 

In [1]:
mlp_nn.calc_accuracy(X_test, y_test)

NameError: name 'mlp_nn' is not defined