<a href="https://colab.research.google.com/github/itissandeep98/ML-Assignments/blob/master/Assignment3/ML_Assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split

# Pre Processing

In [59]:
class MyPreProcessor():
  """
  My steps for pre-processing for the All datasets.
  """

  def __init__(self):
    pass

  def pre_process(self, dataset):
    """
    Reading the file and preprocessing the input and output.
    Note that you will encode any string value and/or remove empty entries in this function only.
    Further any pre processing steps have to be performed in this function too. 

    Parameters
    ----------

    dataset : integer with acceptable values 0, 1, or 2
    0 ->  Dataset
    1 ->  Dataset
    2 ->  Dataset

    Returns
    -------
    X : 2-dimensional numpy array of shape (n_samples, n_features)
    y : 1-dimensional numpy array of shape (n_samples,)
    """
    if dataset == 0:
      df=pd.read_csv("/content/sample_data/mnist_train_small.csv",header=None)
      X=df.iloc[:,1:].to_numpy()
      y=df[0].to_numpy()
      b = np.zeros((y.size, y.max()+1))
      b[np.arange(y.size),y] = 1
      y=b      
    
    elif dataset == 1:
     df=pd.read_csv("/content/drive/MyDrive/ML_Assignment3/largeTrain.csv")

    elif dataset == 2:
     df=pd.read_csv("/content/drive/MyDrive/ML_Assignment3/largeValidation.csv")
    
    return X, y



#My Neural Network

In [118]:
class MyNeuralNetwork():
    """
    My implementation of a Neural Network Classifier.
    """

    acti_fns = ['relu', 'sigmoid', 'linear', 'tanh', 'softmax']
    weight_inits = ['zero', 'random', 'normal']

    def __init__(self, n_layers, layer_sizes, activation, learning_rate, weight_init, batch_size, num_epochs):
        """
        Initializing a new MyNeuralNetwork object

        Parameters
        ----------
        n_layers : int value specifying the number of layers

        layer_sizes : integer array of size n_layers specifying the number of nodes in each layer

        activation : string specifying the activation function to be used
                     possible inputs: relu, sigmoid, linear, tanh

        learning_rate : float value specifying the learning rate to be used

        weight_init : string specifying the weight initialization function to be used
                      possible inputs: zero, random, normal

        batch_size : int value specifying the batch size to be used

        num_epochs : int value specifying the number of epochs to be used
        """

        if activation not in self.acti_fns:
            raise Exception('Incorrect Activation Function')

        if weight_init not in self.weight_inits:
            raise Exception('Incorrect Weight Initialization Function')
        
        np.random.seed(0)
        self.n_layers=n_layers
        self.layer_sizes=layer_sizes 
        self.activation=activation 
        self.learning_rate=learning_rate 
        self.weight_init=weight_init
        self.batch_size=batch_size
        self.num_epochs=num_epochs
        
        weights=[]
        bias=[]
        for i in range(self.n_layers-1):
          weights.append(np.array(self.weight_func((self.layer_sizes[i],self.layer_sizes[i+1]))))
          bias.append(np.zeros(self.layer_sizes[i+1])) 

        self.weights=np.array(weights)
        self.bias=np.array(bias)
        

    def activation_func(self,X):
      if self.activation=="relu":
        return self.relu(X),self.relu_grad(X)
      elif self.activation=="sigmoid":
        return self.sigmoid(X),self.sigmoid_grad(X)
      elif self.activation=="linear":
        return self.linear(X),self.linear_grad(X)
      elif self.activation=="tanh":
        return self.tanh(X),self.tanh_grad(X)
      else:
        return self.softmax(X),self.softmax_grad(X)

    def relu(self, X):
        """
        Calculating the ReLU activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=np.maximum(0,X)
        return x_calc

    def relu_grad(self, X):
        """
        Calculating the gradient of ReLU activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        # print(X)
        x_calc=np.zeros(X.shape)
        x_calc[X>0]=1

        return x_calc

    def sigmoid(self, X):
        """
        Calculating the Sigmoid activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """

        x_calc= 1/(1+np.exp(-X))

        return x_calc

    def sigmoid_grad(self, X):
        """
        Calculating the gradient of Sigmoid activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        sig=self.sigmoid(X)
        x_calc=sig(1-sig)
        return x_calc

    def linear(self, X):
        """
        Calculating the Linear activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=X
        return x_calc

    def linear_grad(self, X):
        """
        Calculating the gradient of Linear activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=np.zeroes(X.shape)
        x_calc[X>0]=1
        x_calc[X<0]=-1
        return x_calc

    def tanh(self, X):
        """
        Calculating the Tanh activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        x_calc=(np.exp(X)-np.exp(-X))/(np.exp(X)+np.exp(-X))
        return x_calc

    def tanh_grad(self, X):
        """
        Calculating the gradient of Tanh activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        tanh=self.tanh(X)
        x_calc=1-tanh**2
        return x_calc

    def softmax(self, X):
        """
        Calculating the ReLU activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        expo = np.exp(X)
        x_calc=expo/expo.sum()
        return x_calc

    def softmax_grad(self, X):
        """
        Calculating the gradient of Softmax activation for a particular layer

        Parameters
        ----------
        X : 1-dimentional numpy array 

        Returns
        -------
        x_calc : 1-dimensional numpy array after calculating the necessary function over X
        """
        s = self.softmax(X)
        si_sj = - s * s.reshape(X.shape[0], 1)
        x_calc = np.diag(s) + si_sj
        
        return x_calc
    

    def weight_func(self,shape):
      if self.weight_init=="zero":
        return self.zero_init(shape)
      elif self.weight_init=="random":
        return self.random_init(shape)
      else:
        return self.normal_init(shape)

    def zero_init(self, shape):
        """
        Calculating the initial weights after Zero Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        weight= np.zeroes(shape)
        return weight 

    def random_init(self, shape):
        """
        Calculating the initial weights after Random Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        weight= np.random.rand(shape[0],shape[1])*10
        return weight 

    def normal_init(self, shape):
        """
        Calculating the initial weights after Normal(0,1) Activation for a particular layer

        Parameters
        ----------
        shape : tuple specifying the shape of the layer for which weights have to be generated 

        Returns
        -------
        weight : 2-dimensional numpy array which contains the initial weights for the requested layer
        """
        weight=np.random.normal(size=shape)
        return weight
    
    def cross_entropy(self,y_hat,y):
        samples=y.shape[0]
        error=y_hat-y
        return error/samples


    def fit(self, X, y):
        """
        Fitting (training) the linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as training data.

        y : 1-dimensional numpy array of shape (n_samples,) which acts as training labels.
        
        Returns
        -------
        self : an instance of self
        """
        preActivation_H = {}
        postActivation_H = {}

        weights=self.weights
        bias=self.bias

        for epoch in range(self.num_epochs):
          for batch in range(0,X.shape[0],self.batch_size):
            X_sample=X[batch:batch+self.batch_size,:]
            y_sample=y[batch:batch+self.batch_size]
            input=X_sample
            output=y_sample

            # Forward Propagation
            for layer in range(self.n_layers-1):
              layer_weight=weights[layer]
              hidden_output=input.dot(layer_weight)+bias[layer]
              hidden_output_activate,_=self.activation_func(hidden_output)          
              input=hidden_output_activate  
              preActivation_H[layer]=hidden_output     
              postActivation_H[layer]=hidden_output_activate

            
            # Backward Propagation
            dW=self.cross_entropy( postActivation_H[self.n_layers-2],output)
            weights[self.n_layers-2]-=self.learning_rate*(postActivation_H[self.n_layers-3].T.dot(dW))
            bias[self.n_layers-2]-=self.learning_rate*np.sum(dW)

            
            for layer in range(self.n_layers-3,0,-1):
              delta=dW.dot(weights[layer+1].T)
              _,derv=self.activation_func(preActivation_H[layer])
              dW=delta*derv

              weights[layer]-=self.learning_rate*postActivation_H[layer-1].T.dot(dW)
              bias[layer]-=self.learning_rate*np.sum(dW,axis=0)

            delta=dW.dot(weights[1].T)
            _,derv=self.activation_func(preActivation_H[0])
            dW=delta*derv
            weights[0]-=self.learning_rate*X_sample.T.dot(dW)
            bias[0]-=self.learning_rate*np.sum(dW,axis=0)

        self.weights=weights
        self.bias=bias
        return self

    def predict_proba(self, X):
        """
        Predicting probabilities using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        Returns
        -------
        y : 2-dimensional numpy array of shape (n_samples, n_classes) which contains the 
            class wise prediction probabilities.
        """
        y=X
        for w, b in zip(self.weights, self.biases):
            z = np.dot(w, y) + b
            y = self.activation_func(z)

        # return the numpy array y which contains the probability of predicted values
        return y

    def predict(self, X):
        """
        Predicting values using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        Returns
        -------
        y : 1-dimensional numpy array of shape (n_samples,) which contains the predicted values.
        """
        y=self.predict_proba(X)


        # return the numpy array y which contains the predicted values
        return y.argmax()

    def score(self, X, y):
        """
        Predicting values using the trained linear model.

        Parameters
        ----------
        X : 2-dimensional numpy array of shape (n_samples, n_features) which acts as testing data.

        y : 1-dimensional numpy array of shape (n_samples,) which acts as testing labels.

        Returns
        -------
        acc : float value specifying the accuracy of the model on the provided testing set
        """

        y_pred=self.predict(X)
        acc=metrics.accuracy_score(y,y_pred)
        return acc

# Testing

In [119]:
classifier=MyNeuralNetwork(4,[784,5,6,10],'relu',0.01,'random',6,100)
classifier.fit(X_train,y_train)

<__main__.MyNeuralNetwork at 0x7fcd5b3b5710>

In [60]:
preprocessor = MyPreProcessor()
X, y = preprocessor.pre_process(0)
print(X.shape,y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.20)

(20000, 784) (20000, 10)


In [50]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

dig = load_digits()
onehot_target = pd.get_dummies(dig.target)
x_train, x_val, y_train, y_val = train_test_split(dig.data, onehot_target, test_size=0.1, random_state=20)
print(x_train.shape,y_train.shape)
def sigmoid(s):
    return 1/(1 + np.exp(-s))

def sigmoid_derv(s):
    return s * (1 - s)

def softmax(s):
    exps = np.exp(s - np.max(s, axis=1, keepdims=True))
    return exps/np.sum(exps, axis=1, keepdims=True)

def cross_entropy(pred, real):
    n_samples = real.shape[0]
    res = pred - real
    return res/n_samples

def error(pred, real):
    n_samples = real.shape[0]
    logp = - np.log(pred[np.arange(n_samples), real.argmax(axis=1)])
    loss = np.sum(logp)/n_samples
    return loss

class MyNN:
    def __init__(self, x, y):
        self.x = x
        neurons = 128
        self.lr = 0.5
        ip_dim = x.shape[1]
        op_dim = y.shape[1]

        self.w1 = np.random.randn(ip_dim, neurons)
        self.b1 = np.zeros((1, neurons))
        self.w2 = np.random.randn(neurons, neurons)
        self.b2 = np.zeros((1, neurons))
        self.w3 = np.random.randn(neurons, op_dim)
        self.b3 = np.zeros((1, op_dim))
        self.y = y

    def feedforward(self):
        z1 = np.dot(self.x, self.w1) + self.b1
        self.a1 = sigmoid(z1)
        z2 = np.dot(self.a1, self.w2) + self.b2
        self.a2 = sigmoid(z2)
        z3 = np.dot(self.a2, self.w3) + self.b3
        self.a3 = softmax(z3)
        
    def backprop(self):
        loss = error(self.a3, self.y)
        # print('Error :', loss)
        a3_delta = cross_entropy(self.a3, self.y) # w3
        z2_delta = np.dot(a3_delta, self.w3.T)
        a2_delta = z2_delta * sigmoid_derv(self.a2) # w2
        z1_delta = np.dot(a2_delta, self.w2.T)
        a1_delta = z1_delta * sigmoid_derv(self.a1) # w1
        print(a3_delta.shape ,self.a3.shape, self.y.shape)
        

        self.w3 -= self.lr * np.dot(self.a2.T, a3_delta)
        self.b3 -= self.lr * np.sum(a3_delta, axis=0, keepdims=True)
        self.w2 -= self.lr * np.dot(self.a1.T, a2_delta)
        self.b2 -= self.lr * np.sum(a2_delta, axis=0)
        self.w1 -= self.lr * np.dot(self.x.T, a1_delta)
        self.b1 -= self.lr * np.sum(a1_delta, axis=0)
        # print(self.b3.shape,self.b2.shape,self.b1.shape)

    def predict(self, data):
        self.x = data
        self.feedforward()
        return self.a3.argmax()
			
model = MyNN(x_train/16.0, np.array(y_train))

epochs = 12
for x in range(epochs):
    model.feedforward()
    model.backprop()
		
def get_acc(x, y):
    acc = 0
    for xx,yy in zip(x, y):
        s = model.predict(xx)
        if s == np.argmax(yy):
            acc +=1
    return acc/len(x)*100
	
print("Training accuracy : ", get_acc(x_train/16, np.array(y_train)))
print("Test accuracy : ", get_acc(x_val/16, np.array(y_val)))

(1617, 64) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
(1617, 10) (1617, 10) (1617, 10)
Training accuracy :  58.8126159554731
Test accuracy :  56.666666666666664


In [57]:
b = np.zeros((y.size, y.max()+1))
b[np.arange(y.size),y] = 1
b.shape

(20000, 10)

In [58]:
b

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.]])