## Loading the dataset

We are going to use sklearn to load the dataset, this will be the only time we will use it during this report. Only to load the dataset.

In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from abc import ABC, abstractmethod
mnist = fetch_openml('mnist_784')
X, y = mnist["data"], mnist["target"]

print(X.shape)
print(y.shape)

from keras.utils.np_utils import to_categorical



(70000, 784)
(70000,)


Using TensorFlow backend.


In [2]:
y_new = to_categorical(y)
print(y_new[2])

split = 60000
split_test = X.shape[0] - split

print(split_test)

X_train = X[:split]
y_train = y_new[:split]

X_test = X[split:]
y_test = y_new[split:]

print(X_train.shape)
print(y_train.shape)

[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
10000
(60000, 784)
(60000, 10)


## Functions 

## Making the Layer class

We are going to make a Layer class that will provide two methods to be implemented by subclasses. The method is create_layer(self, inputs, seed) and forward(self, inputs) to provide the functionality of backpropagation and forward propagation of the layer


In [3]:
class Layer(ABC):
    
    """ A layer of neurons in a neural network"""
    
    def __init__(self, neurons: int):
        self.neurons = neurons
        self.weights = 0
        self.is_initialized = True
        self.gradients_param = []
        self.operations = []
        self.seed = 1
    @abstractmethod 
    def create_layer(self, inputs, seed):
        """ Creates layer """
        pass
    
    @abstractmethod
    def forward(self, inputs):
        """ Calculate layer output using forward propagation for given input. """
        pass

    @abstractmethod 
    def backward(self, ouputs):
        pass
    

## Making the DenseLayer class

We will make the DenseLayer that will implement the Layer class to provide backpropagation and forward propagation.

In [6]:
class DenseLayer(Layer):
    
    def __init__(self, neurons: int, activation):
        super().__init__(neurons)
        self.activation = activation
        
    def create_layer(self, inputs, seed):
        
        if isinstance(seed, int):
            self.seed = np.random.seed(seed)
        self.params = []

        
        #Add weights
        self.params.append(np.random.randn(inputs.shape[1], self.neurons))

        #bias
        self.params.append(np.random.randn(1, self.neurons))
        
        self
        self.operations = [WeightMultiply(self.params[0]),
                           BiasAdd(self.params[1]),
                           self.activation]
        
        def forward(self, inputs):
        """ Calculate layer output using forward propagation for given input. """
            pass
        
        def add_inputs(x, y, activation_func):
            
            output = x + y
            return activation_func(output)

        def add_inputs_backward(x, y, activation_func):
            # Calculate "forward pass"
            output = x + y
            dsda = deriv(activation_func, output)
            dadx, dady = 1, 1
            
            return dsda * dadx, dsda * dady
        
        def deriv(func, inputs, delta):
            return (func(inputs + delta) - (func(inputs - delta)) / (2 * delta)
        

        def matmul_forward(X, W, activation_func):
            """Calculates the forward pass of a matrix multiplication"""
            # matrix multiplication        
            N = np.dot(X, W)
            
            # applying activation function 
            S = activation_func(N)
            
            # sum all the elements
            L = np.sum(S)
                    
            return L

        def matmul_backward_first(X, W, activation_func):
            """Calculates the backward pass of a matrix multiplication respect X"""  
            #matrix multiplication of train and weights
            N = np.dot(X, W)
            
            #applying activation function
            S = sigma(N)
                    
            #backward calculation
            
            #dLdS - just 1s
            dLdN = np.ones_like(S)
            dSdN = deriv(sigma, N)
            
            dLdN = dLdS * dSdN
                    
            dNdX = np.transpose(W, (1, 0))
             
            dLdX = np.dot(dSdN, dNdX)
            # multiply them together
            return dLdX

        def backward(self, ouputs):
            pass

IndentationError: expected an indented block (<ipython-input-6-7ac8f795da2d>, line 26)

## Making the Loss class

This class will provide the loss functions that will be applied after the activation function has been applied.


In [None]:
class Loss:
    
    def __init__(self, y_pred, y_true, type_m):
        
        self.y_pred = y_pred
        self.y_true = y_true
        
        if type_m == 'msr':
            self.mean_squared_error()
        elif type_m == 'cat_crossentropy':
            self.categorical_crossentropy()
        else:
            raise ValueError('Invalid loss function')
            
    def mean_squared_error():
        return np.mean((self.y_pred - self.y_true) ** 2, axis = 1)
    

    def mean_squared_error_d():
        return np.expands_dims((2/self.y_pred.shape[1]) * (self.y_pred - self.y_true), 1)
    
    def categorical_crossentropy():
        return -np.log(np.sum(self.y_true * self.y_pred, axis=1) + EPS)
    

## Making the Activation class.

This class will provide the activations functions that will be applied to the input and output layer

In [None]:
class Activation(Layer):
    

    """Class of an operation in a neural network such as forward and backward"""
    def __init__(self, type_func):
        if type_func == 'sigmoid':
            self.act_func = self.sigmoid
            self.act_func_d = self.sigmoid_d
        elif type_func == 'relu':
            self.act_func = self.relu
            self.act_func_d = self.relu_d
        elif type_func == 'tanh':
            self.act_func = self.tanh
            self.act_func_d = self.tanh_d
        elif type_func == 'softmax':
            self.act_func = self.softmax
            self.act_func_d = self.softmax_d
        else:
            raise ValueError('Invalid activation function.')
        
    def forward(self, inputs):
        self.last_input = inputs
        return self.act_func(inputs)

        
    def backward(self, output):
        """Returns the derivative of the activation function"""
        return output * self.act_func_d(self.last_input)

    def sigmoid(self, z):
            
        #if isDeriv: # When backpropagating
            #return (np.exp(-z)) / ((np.exp(-z) + 1) **2)

        # Forward feed
        return 1 / (1 + np.exp(-z))
  
    def sigmoid_d(self, z):
        s = self.sigmoid(z)
        return s*(1-s)

   
    # Relu function can also be applied to input/hidden layers
    def relu(self, z):
        
        r = np.maximum(0, z)
        return r
    # Gradient of the Relu function 
    def relu_d(self, z):
      
        dz = np.zeros(z.shape)
        dz[z >= 0] = 1
        return dz
# Softmax function to be applied to the output layer 
    def softmax(self, z, isDeriv):
        expo = np.exp(z - z.max())
        
        if isDeriv: # When backpropagating
            return expo / np.sum(expo, axis = 0) * (1 - expo / np.sum(expo, axis = 0))
        
        # Forward feed
        return expo / np.sum(expo, axis = 0)    

    def softmax_d(self, x):
        s = self.softmax(x)
        ds = np.stack([np.diag(s[i, :]) for i in range(s.shape[0])])
  #Tanh function to be applied to the input/output layer  
    def tanh(self, z):
        """Returns the tanh of x """
        return np.tanh(z)
    
  #Gradient of tanh function 
    def tanh_d(self, z):
        """Returns the gradiend of tanh(x) function """
        e = np.exp(2*z)
        return (e - 1)/(e + 1)  

## Making the Optimizer class

The Optimizer class will be implemented by subclasses to provide optimization

In [4]:
class Optimizer(ABC):
    
    def __init__(self, weights, alpha=0.001, ):
        self.learning_rate = learning_rate
        self.momentum = momentum
         
    
    @abstractmethod
    def step(self):
        pass
    
    @abstractmethod
    def update(self, params, gradient):
        """Updates the change of the weights to minimise the error"""
        pass


In [5]:

class Adam(Optimizer):    
    
    def __init__(self, X, y, learning_rate=0.01, momentum = 0.0):
        
    


SyntaxError: unexpected EOF while parsing (<ipython-input-5-a699c15dc7f6>, line 5)

In [None]:
class SGD(Optimizer):
    
    def __init__(self, X, y, learning_rate =0.01, momentum = 0.0):
        super().__init__(learning_rate=learning_rate, momentum= momentum)
        self.m = 0.2
        self.b = 0.2
        N = len(X)
    def update(self, params, gradient, has_delta=False):
        if not has_delta:
                
                self.delta = np.zeros_like(params)

        self.delta = self.momentum
        
    def step(self):
        #pass
        #if self.first: #if  
        for it in range(iterations):
            cost = 0.0
            for i in range(m):
                random_int = np.random.randint(0, m)
                X_i = X[random_int, :].reshape(1, X.shape[1])
                y_i = y[random_int].reshape(1, 1)
                pred = np.dot(Xi, theta)
                self.update()
                
    def update(self, params, gradient, loss):
        m -= self.learning_rate * (-2 * self.X.dot(f).sum() / N)
        b -= self.learning_rate * (-2 * f.loss)
        
    def stocashtic_gradient_descent(X,y,theta,learning_rate=0.01,iterations=10):
    '''
    X    = Matrix of X with added bias units
    y    = Vector of Y
    theta=Vector of thetas np.random.randn(j,1)
    learning_rate 
    iterations = no of iterations
    
    Returns the final theta vector and array of cost history over no of iterations
    '''
    m = len(y)
    cost_history = np.zeros(iterations)
    
    
    for it in range(iterations):
        cost =0.0
        for i in range(m):
            rand_ind = np.random.randint(0,m)
            Xi = X[rand_ind,:].reshape(1,X.shape[1])
            yi = y[rand_ind].reshape(1,1)
            prediction = np.dot(X_i,theta)

            theta = theta -(1/m)*learning_rate*( Xi.T.dot((prediction - yi)))
            cost += cal_cost(theta,Xi,yi)
        cost_history[it]  = 
        
    return theta, cost_history