In [4]:
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(int(time.time()))

In [28]:
class NeuronLayer:
    def __init__(self, struct, prevCount):
        self.prevCount = prevCount
        self.count = struct['neurons']
        self.neurons = None
        self.errors = None
        self.alpha = 0.25
        self.activationDerivative = self.identity_derivative
        if(struct['type'] != 'input'):
            self.synapses = np.random.normal(0.0, 0.01, (self.count, self.prevCount))
            self.bias = np.ones((self.count, 1))
            self.learningRate = struct['learningRate']
            self.dropoutRate = struct['dropoutRate']
            self.dropout = np.array([[self.learningRate]*prevCount]*self.count)
            if (struct['type'] != 'custom'):
                if (struct['activation'] == 'identity'):
                    self.activation = self.identity
                    self.activationDerivative = self.identity_derivative
                elif (struct['activation'] == 'binarystep'):
                    self.activation = self.binarystep
                    self.activationDerivative = self.binarystep_derivative
                elif (struct['activation'] == 'sigmoid'):
                    self.activation = self.sigmoid
                    self.activationDerivative = self.sigmoid_derivative
                elif (struct['activation'] == 'tanh'):
                    self.activation = self.tanh
                    self.activationDerivative = self.tanh_derivative
                elif (struct['activation'] == 'arctan'):
                    self.activation = self.arctan
                    self.activationDerivative = self.arctan_derivative
                elif (struct['activation'] == 'ReLU'):
                    self.activation = self.ReLU
                    self.activationDerivative = self.ReLU_derivative
                elif (struct['activation'] == 'leakyReLU'):
                    self.activation = self.leakyReLU
                    self.activationDerivative = self.leakyReLU_derivative
                elif (struct['activation'] == 'ELU'):
                    self.activation = self.ELU
                    self.activationDerivative = self.ELU_derivative
                elif (struct['activation'] == 'softplus'):
                    self.activation = self.softplus
                    self.activationDerivative = self.softplus_derivative
                elif (struct['activation'] == 'softmax'):
                    self.activation = self.softmax
                    self.activationDerivative = self.softmax_derivative
                else:
                    print('ERROR: NeuronLayer requires valid activation.')
                if (struct['type'] == 'output'):
                    if (struct['loss'] == 'mse'):
                        self.loss = self.MSE
                    elif (struct['loss'] == 'msePrime'):
                        self.loss = self.MSEPrime
                    elif (struct['loss'] == 'mae'):
                        self.loss = self.MAE
                    elif (struct['loss'] == 'hinge'):
                        self.loss = self.hinge
                    elif (struct['loss'] == 'sigmoidCrossEntropy'):
                        self.loss = self.sigmoidCrossEntropy
                    else:
                        print('ERROR: NeuronLayer requires valid loss function.')
            else:
                if ('activation' in struct):
                    self.activation = struct['activation']
                    self.activationDerivative = struct['activationDerivative']
                if ('loss' in struct):
                    self.loss = struct['loss']

    def identity(self, A):
        return A
    
    def identity_derivative(self, A):
        return np.ones(A.shape)
    
    def binarystep(self, A):
        return np.where(A < 0, 0, 1)
    
    def binarystep_derivative(self, A):
        return np.ones(A.shape)

    def sigmoid(self, A):
        return 1 / (1 + np.exp(-A))

    def sigmoid_derivative(self, A):
        return A * (1 - A)

    def tanh(self, A):
        return np.tanh(A)
    
    def tanh_derivative(self, A):
        return 1 - np.power(np.tanh(A), 2)

    def arctan(self, A):
        return np.arctan(A)
    
    def arctan_derivative(self, A):
        return 1 / (np.power(A, 2) + 1)

    def ReLU(self, A):
        return np.where(A >= 0, A, 0)

    def ReLU_derivative(self, A):
        return np.where(A >= 0, 1, 0)

    def leakyReLU(self, A):
        return np.where(A >= 0, A, np.multiply(self.alpha,A))

    def leakyReLU_derivative(self, A):
        return np.where(A >= 0, 1, self.alpha)

    def ELU(self, A):
        return np.where(A >= 0, A, np.multiply(self.alpha, np.exp(A)-1))

    def ELU_derivative(self, A):
        return np.where(A >= 0, 1, np.multiply(
            self.alpha, np.exp(A)-1) + self.alpha)

    def softplus(self, A):
        return np.log(1 + np.exp(A))

    def softplus_derivative(self, A):
        return 1 / (1 + np.exp(-A))
    
    def softmax(self, A):
        e = np.exp(A - np.max(A))
        return e / e.sum()
    
    def softmax_derivative(self, A):
        return A

    def MSE(self, y, yhat):
        return np.power((y-yhat - y),2) / y.size

    def MSEPrime(self, y, yhat):
        return y - yhat

    def MAE(self, y, yhat):
        return np.absolute(y - yhat)

    def hinge(self, y, yhat):
        return np.where(1-(y*yhat) > 0, 1-(y*yhat), 0)
    
    def entropy(self, yhat):
        return np.where(p != 0, p*np.log2(p), 0)

    def crossEntropy(self, y, yhat):
        return np.max(yhat, 0) - yhat * y + np.log2(1 + np.exp(-yhat))

    def KLDivergence(self, y, yhat):
        return self.crossEntropy(y, yhat) - self.entropy(yhat)
    
    def logLikelihood(self, y, yhat):
        return np.where((y/2)-0.5+yhat > 0, -np.log(np.abs((y/2)-0.5+yhat)), 0)

    def dropoutUpdate(self, level=0):
        if (level == 0):
            if (self.dropoutRate != 0):
                self.dropout = self.learningRate * np.random.choice([0, 1],
                    size=(self.count, self.prevCount),
                    p=[self.dropoutRate, 1-self.dropoutRate]).astype(np.bool)
        elif(level == 1):
            np.random.shuffle(self.dropout)

In [33]:
class NeuralNetwork:
    def __init__(self, networkStruct):
        self.layers = len(networkStruct)
        self.network = list()
        self.accHistory = list()
        self.confusionMatrix = None
        self.network = np.array(
            [NeuronLayer(networkStruct[0], 0)])
        for i in range(1, self.layers):
            np.append(self.network,
                [NeuronLayer(networkStruct[i],
                    networkStruct[i-1]['neurons'])])

    def feedForward(self, data, target=None):
        self.network[0].neurons = np.array([])
        for l in range(1,self.layers):
            self.network[l].neurons = np.array([])
            self.network[l].errors = np.array([])
        self.network[0].neurons = np.array(data)
        for i in range(1, self.layers):
            print(self.network[i].synapses.shape)
            print(self.network[i-1].neurons.shape)
            print(np.dot(self.network[i].synapses,
                    self.network[i-1].neurons).shape)
            print(self.network[i].bias.shape)
            np.append(self.network[i].neurons,
                [self.network[i].activation(
                    np.dot(self.network[i].synapses,
                        self.network[i-1].neurons) + \
                            self.network[i].bias)])
        if (type(target) != type(None)):
            np.append(self.network[-1].errors,
                [self.network[-1].loss(
                    target, 
                    self.network[-1].neurons) * \
                self.network[-1].activationDerivative(
                    self.network[-1].neurons)])
#         elif (len(data.shape) == 3):
#             for i in range(1, self.layers):
#                 self.network[i].neurons = self.network[i].activation(
#                      np.transpose(np.dot(self.network[i].synapses,
#                         self.network[i-1].neurons), (1,0,2)) + \
#                             self.network[i].bias)
#             if (type(target) != type(None)):
#                 self.network[-1].errors = self.network[-1].loss(
#                     target,
#                     self.network[-1].neurons) * \
#                 self.network[-1].activationDerivative(
#                     self.network[-1].neurons)

    def backpropagation(self):
        for i in reversed(range(1, self.layers)):
            self.network[i].synapses += self.network[i].dropout * \
                np.average(np.dot(self.network[i].errors,
                    self.network[i-1].neurons.T), axis=0)
            self.network[i].bias += self.network[i].dropout * \
                np.average(np.dot(self.network[i].errors,
                    self.network[i].errors.T), axis=0)
            self.network[i-1].errors = np.multiply(
                np.dot(self.network[i].synapses.T,
                    self.network[i].errors),
                self.network[i-1].activationDerivative(
                    self.network[i-1].neurons))

    def train(self, trainLabels, trainData, epochs=1, 
              testLabels=[], testData=[], minibatch=50):
        zippedData = list(zip(trainLabels,trainData))
        for e in range(epochs):
            print('\t-- Epoch {}'.format(e+1))
            for i in range(1, self.layers):
                self.network[i].dropoutUpdate(0)
            np.random.shuffle(zippedData)
            for j in range(0, len(zippedData), minibatch):
                label, data = zip(*zippedData[j: j+minibatch])
                label, data = np.array(label), np.array(data)
                target = self.oneHotEncode(label)
#                 print(data.reshape(minibatch,data.shape[1],1).shape)
#                 print(target.shape)
                data = data.reshape(minibatch,data.shape[1],1)
                self.feedForward(data, target)
                self.backpropagation()
            self.test(testLabels, testData)
            accuracy = np.sum(self.confusionMatrix.diagonal()) / \
                       np.sum(self.confusionMatrix)
            self.accHistory.append(accuracy)
            print('Accuracy = {0:.2f}%'.format(accuracy*100))
        plt.plot(np.arange(epochs), self.accHistory)
        plt.title("Training Output History")
        plt.xlabel("Epoch")
        plt.xlabel("Accuracy")
        plt.yticks(np.arange(0, 1.1, step=0.1))

    def test(self, labels, testData):
        self.confusionMatrix = np.zeros((
            self.network[-1].count, self.network[-1].count))
        for label, data in zip(labels, testData):
            data = data.reshape(len(data), 1)
            self.feedForward(data)
            bestIndex = np.argmax(self.network[-1].neurons)
            self.confusionMatrix[label, bestIndex] += 1
    
    def miniBatch(self, data):
        i = 0
        while (i < len(data)):
            i += self.batchSize

    def predict(self, data):
        self.feedForward(data)
        return np.argmax(self.network[-1].neurons)
            
    def oneHotEncode(self, index):
        #index = np.array(index, dtype=np.int)  # minibatch stuff
        if(type(index) == int):
            vect = np.zeros((self.network[-1].count, 1))
            vect[index][0] = 1
        else:
            vect = np.zeros((len(index),self.network[-1].count, 1))
            vect[range(len(index)), index, 0] = 1
        return vect
        

In [34]:
class DataLoader:
    def __init__(self):
        pass

    def standardize(self, A):
        return (A - np.mean(A)) / np.std(A)

    def normalize(self, A):
        return (A - np.min(A)) / (np.max(A) - np.min(A))

    def extractMNIST(self, fileName):
        labels = []
        fname = open(fileName, "r")
        values = fname.readlines()[:20000]
        fname.close()
        for i, record in enumerate(values):
            data = record.split(",")
            values[i] = np.asfarray(data[1:]) / 255
            labels.append(int(data[0]))
        return labels, values

In [35]:
import pandas as pd
import seaborn as sn

# Open files with DataLoader class methods specific for MNIST data.
dl = DataLoader()
print("Opening Training Data")
MNIST_Train_Labels, MNIST_Train_Values = dl.extractMNIST("MNIST/mnist_train.csv")
print("Opening Testing Data")
MNIST_Test_Labels, MNIST_Test_Values = dl.extractMNIST("MNIST/mnist_test.csv")

# Input parameters
epochs = 10
suppressOutput = False

# Design neural network
network = {0:  
           {'neurons':     784,
            'type':        'input'},
           1:  
           {'neurons':      150,
            'learningRate': 0.001,
            'activation':   'tanh',
            'dropoutRate':  0.05,
            'type':         'hidden'},
           2:  
#            {'neurons':      150,
#             'learningRate': 0.001,
#             'activation':   'tanh',
#             'dropoutRate':  0.05,
#             'type':         'hidden'},
#            3:  
           {'neurons':      10,
            'learningRate': 0.001,
            'activation':   'softmax',
            'dropoutRate':  0.0,
            'loss':         'msePrime',
            'type':         'output'}}

# Create neural network
print("Creating Network")
ann = NeuralNetwork(network)

# Train
print("Training:")
ann.train(MNIST_Train_Labels, MNIST_Train_Values, epochs,
          MNIST_Test_Labels, MNIST_Test_Values)

# Predict 
pred = ann.predict(MNIST_Test_Values[0])
print("Input : {}".format(MNIST_Test_Labels[0]))
print("Output: {}".format(pred))

# Display confusion matrix
df = pd.DataFrame(ann.confusionMatrix / np.sum(ann.confusionMatrix) * 100)
plt.figure(figsize=(10,10))
sn.heatmap(df)

Opening Training Data
Opening Testing Data
Creating Network


ValueError: all the input arrays must have same number of dimensions