### Multi-layer Perceptron with one hidden layer

In [1]:
import numpy as np

def sigmoid(x):
    """
    Sigmoid Activation function for hidden layer 
    """
    return 1/(1+np.exp(-x))

def sigmoid_der(x):
    """
    Derivative of Sigmoid function 
    """
    return sigmoid(x)*(1-sigmoid(x))

def softmax(A):
    """
    Softmax activation funciton for output layer
    """
    e = np.exp(A)
    return e / np.sum(e, axis=1, keepdims=True)

def accuracy(y_true, y_pred):
    if not (len(y_true) == len(y_pred)):
        print('Size of predicted and true labels not equal.')
        return 0.0

    corr = 0
    for i in range(0,len(y_true)):
        corr += 1 if (y_true[i] == y_pred[i]).all() else 0

    return corr/len(y_true)

class MLP(object):
    """
    Multi_layer perceptron:
    - One hidden layer
    - User defined number of Neurons
    - User defined learning rate
    - Hidden layer Activation function : Sigmoid
    - Output layer Activation function : Softmax
    - Cost function: Cross Entropy loss function
    - Using Mini-batch gradient descent optimization technique
    - User defined Batch size
    """
    def __init__(self,inputSize,hiddenSize,outSize,lr):
        """
        Initialize starting variables
        """
        self.learning_rate=lr # set learning rate
        # initialize layer dimensions 
        self.inputSize = inputSize # No of features of input data point 
        self.outSize = outSize # No of classes in label
        self.hiddenSize = hiddenSize # No of Neurons in hidden layer 
        
        #initialize parameters
        np.random.seed(1) 
        self.W1 = np.zeros((self.inputSize,self.hiddenSize)) # intial Weights for input layer
        self.b1 = np.zeros((1,self.hiddenSize)) # intial Bias for hidden layer
        self.W2 = np.random.randn(self.hiddenSize,self.outSize) # intial Weights for hidden layer
        self.b2 = np.zeros((1,self.outSize)) # intial Bias for out layer
 
    def getParameters(self):
        """
        Get method to return parameter values and shape
        """
        print("Weight_1:{}\n{}".format(self.W1.shape,self.W1))
        print("Bias_1:{}\n{}".format(self.b1.shape,self.b1))
        print("Weight_2:{}\n{}".format(self.W2.shape,self.W2))
        print("Bias_2:{}\n{}".format(self.b2.shape,self.b2))
        
    def mini_batches(self,X, y, batch_size): 
        """
        Batching of training data as per the given batch size
        Input: Training data
        output: batched training data list
        """
        self.batches=[]
        data = np.hstack((X, y)) 
        np.random.shuffle(data) 
        n_batches = data.shape[0] 
        i = 0
        
        for i in range(n_batches + 1): 
            batch = data[i * batch_size:(i + 1)*batch_size, :] 
            X_mini = batch[:, :-4] 
            Y_mini = batch[:,-4:]
            self.batches.append((X_mini, Y_mini)) 
        if data.shape[0] % batch_size != 0: 
            batch = data[i * batch_size:data.shape[0]] 
            X_mini = batch[:, :-4]
            Y_mini = batch[:,-4:]
            self.batches.append((X_mini, Y_mini)) 
        return self.batches 
    
    def forwardPass(self,X):
        """
        forward propagation through input layer to output layer
        Input: Training features
        output: predicted labels
        """
        
        # From Input layer to hidden layer
        self.z1=np.dot(X,self.W1)+self.b1 # dot product of input and first set of weights
        self.a1=sigmoid(self.z1) # First activation function result
        
        # Hidden layer to output layer
        self.z2=np.dot(self.a1,self.W2)+self.b2 # dot product of output of hidden layer and second set of weights
        self.a2=softmax(self.z2) # Output activation function
        
        return self.a2
    
    def backProp(self,X,y_actual,y_pred):
        """
        Backpropagation through output to input layer
        Description:
        Finds the gradient of cost function wrt parameters and adjust the parameters
        """
        
        # At output layer 
        """
        Gradient of cost function wrt w2: dJ_dw2 = dJ_da2 * da2_dz2 * dz2_dw2
        Gradient of cost function wrt b2: dJ_db2 = dJ_da2 * da2_dz2 * dz2_db2
        And by chain rule,
        dJ_dz2 = dJ_da2 * da2_dz2 --> 1
        """
        dJ_dz2 = y_pred - y_actual # Derivative of softmax function with cross-entropy loss
        dz2_dw2 = self.a1  # Derivative of the o/p coming from hidden layers
        dJ_dw2 = np.dot(dz2_dw2.T,dJ_dz2) # Gradient of cost function wrt w2
        
        dJ_db2 =  dJ_dz2
        
        # At Hidden layer
        """
        Gradient of cost function wrt w1: dJ_dw1 = dJ_da1 * da1_dz1 * dz1_dw1
        Gradient of cost function wrt b1: dJ_db1 = dJ_da1 * da1_dz1 * dz1_db1
        And by chain rule,
        dJ_da1 = dJ_dz2 * dz2_da1
        from 1, we know dJ_dz2 = dJ_da2 * da2_dz2
        """
        dz2_da1 = self.W2 # Derivative of 
        dJ_da1  = np.dot(dJ_dz2,dz2_da1.T)
        da1_dz1 = sigmoid_der(self.z1)
        
        dz1_dw1 = X
        dJ_dw1  = np.dot(dz1_dw1.T,da1_dz1*dJ_da1) 
        dJ_db1 = dJ_da1 * da1_dz1
        
        self.W1 -= self.learning_rate*dJ_dw1 # Adjusting the weights input layer --> hidden layer
        self.b1 -= self.learning_rate*dJ_db1.sum(axis=0) # Adjusting the Bias input layer --> hidden layer
        self.W2 -= self.learning_rate*dJ_dw2 # Adjusting the weights output layer
        self.b2 -= self.learning_rate*dJ_db2.sum(axis=0) # Adjusting the Bias output layer
    
    def train(self,X,y,n_epochs,batch_size):
        """
        Training the batched data: (ForwardPass + BackProp)
        Input: Batched Train data, No of epochs, batch size and learning rate
        """
        for itr in range(n_epochs):
            batch_acc=[]
            num_batch=self.mini_batches(X,y,batch_size)
            for batch in num_batch:
                if(len(batch[0])==0):
                    break
                X_b,y_b=batch
                forward_out=self.forwardPass(X_b)
                self.backProp(X_b,y_b,forward_out)
                b = np.zeros_like(forward_out)
                b[np.arange(len(forward_out)), forward_out.argmax(1)] = 1
                train_accuracy = accuracy(y_b,b)
                batch_acc.append(train_accuracy)
            print("Epoch:: {} ; Training_acc= {}\n".format((itr+1),np.mean(batch_acc)))
        self.pred=self.forwardPass(X)
        one_hot_encoded = np.zeros_like(self.pred)
        one_hot_encoded[np.arange(len(self.pred)), self.pred.argmax(1)] = 1
      
    def predict(self,X_test,y_test):
        forward_out=self.forwardPass(X_test)
        b = np.zeros_like(forward_out)
        b[np.arange(len(forward_out)), forward_out.argmax(1)] = 1
        self.test_accuracy = accuracy(y_test,b)
        
        return self.test_accuracy

In [4]:
# Loading and preprocessing the data
train_data=np.loadtxt(open("train_data.csv", "rb"), delimiter=",")
train_labels=np.loadtxt(open("train_labels.csv","rb"), delimiter=",")

data_train=np.concatenate((train_data,train_labels),axis=1)
np.random.shuffle(data_train)
train_pro=int(len(data_train)*0.8)
train, test = data_train[:train_pro,:], data_train[train_pro:,:]

X_train=train[:,:784]
X_test = test[:,:784]
y_train=train[:,784:]
y_test=test[:,784:]

In [11]:
# Parameters for the model
inputSize = X_train.shape[-1]
hiddenSize = 10
outSize = 4
lr = 0.001
batch_size = 64
n_epochs = 30

#Build one hidden layer MLP
MLP_oneLayer= MLP(inputSize,hiddenSize,outSize,lr)

# Train the model
MLP_oneLayer.train(X_train,y_train,n_epochs,batch_size)

# Test the model
prediction_acc=MLP_oneLayer.predict(X_test,y_test)

print("Test_Accuracy:",prediction_acc)

Epoch:: 1 ; Training_acc= 0.9066532258064516

Epoch:: 2 ; Training_acc= 0.9607172192353644

Epoch:: 3 ; Training_acc= 0.9644470579450417

Epoch:: 4 ; Training_acc= 0.9676728643966547

Epoch:: 5 ; Training_acc= 0.9692540322580645

Epoch:: 6 ; Training_acc= 0.9696068548387097

Epoch:: 7 ; Training_acc= 0.9709490740740739

Epoch:: 8 ; Training_acc= 0.9717555256869772

Epoch:: 9 ; Training_acc= 0.9730846774193549

Epoch:: 10 ; Training_acc= 0.972795325567503

Epoch:: 11 ; Training_acc= 0.9734001642771803

Epoch:: 12 ; Training_acc= 0.9747983870967742

Epoch:: 13 ; Training_acc= 0.9753528225806452

Epoch:: 14 ; Training_acc= 0.9756048387096774

Epoch:: 15 ; Training_acc= 0.9762918160095578

Epoch:: 16 ; Training_acc= 0.9768966547192353

Epoch:: 17 ; Training_acc= 0.9775014934289127

Epoch:: 18 ; Training_acc= 0.9777217741935483

Epoch:: 19 ; Training_acc= 0.9784274193548387

Epoch:: 20 ; Training_acc= 0.9785786290322581

Epoch:: 21 ; Training_acc= 0.9794354838709678

Epoch:: 22 ; Training_a