# Deep learning from scratch

In [2]:
import numpy as np
from numpy import ndarray
from typing import List

In [3]:
def assert_same_shape(array: ndarray, array_grad: ndarray):
    assert array.shape == array_grad.shape, \
    '''
    Two arrays should have the same shape.
    Instead, the first array has the shape of {0} and the second has the shape of {1}.
    '''.format(tuple(array_grad.shape), tuple(array.shape))

## Operation and ParamOperation parent classes

In [4]:
class Operation:
    """
    Base class for an "operation" in a neural network
    """

    def __init__(self):
        pass
        
    def forward(self, input_: ndarray):
        '''
        Stores input in the self._input instance variable
        Calls the self._output() function.
        '''
        self.input_ = input_
        self.output = self._output()

        return self.output

    def backward(self, output_grad: ndarray) -> ndarray:
        """
        Calls the self._input_grad() function.
        Checks that the appropriate shapes match.
        """
        assert_same_shape(self.output, output_grad)
        self.input_grad = self._input_grad(output_grad)
        assert_same_same(self.input_, self.input_grad)
        return self.input_grad

    def _output(self):
        '''
        The _output method must be defined for each Operation.
        '''
        raise NotImplementedError()

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        The _input_grad method must be defined for each Operation.
        '''
        raise NotImplementedError()


In [7]:
class ParamOperation(Operation):
    '''
    An operation with parameters involved.
    '''
    def __init__(self, param: ndarray) -> ndarray:
        super().__init__()
        self.param = param

    def backward(self, output_grad: ndarray) -> ndarray:
        '''
        Calls self._input_grad and self._param_grad.
        Checks appropriate shapes.
        '''
        assert_same_shape(self.output, output_grad)

        self.input_grad = self._input_grad(output_grad)
        self.param_grad = self._param_grad(output_grad)

        assert_same_shape(self.input_, self.input_grad)
        assert_same_shape(self.param, self.param_grad)

        return self.input_grad

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Every subclass of ParamOperation must implement _param_grad.
        '''
        raise NotImplementedError()
        

## Implementation of different operations
Deriving from `Operation` and `ParamOperation` classes

In [8]:
class WeightMultiply(ParamOperation):
    '''
    Weight multiplication operation for a neural network.
    '''
    def __init__(self, W: ndarray):
        '''
        Initialize Operation with self.param = W
        '''
        super().__init__(W)

    def _output(self) -> ndarray:
        '''
        Compute output.
        '''
        return np.dot(self.input_, self.param)

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute input gradient.
        '''
        return np.dot(output_grad, self.param.T)

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute parameter gradient.
        '''
        return np.dot(self.input_.T, output_grad)
        

In [9]:
class BiasAdd(ParamOperation):
    '''
    Compute bias addition.
    '''
    def __init__(self, B: ndarray):
        '''
        Initialize Operation with self.param = B
        Check appropriate shapes.
        '''
        assert B.shape[0] == 1
        super().__init__(B)

    def _output(self) -> ndarray:
        '''
        Compute output.
        '''
        return self.input_ + self.param

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute input gradient.
        '''
        return np.ones_like(self.input_) * output_grad

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute parameter gradient.
        '''
        param_grad = np.ones_like(self.param) * output_grad
        return np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])

In [10]:
class Sigmoid(Operation):
    '''
    Sigmoid activation function.
    '''
    def __init__(self) -> None:
        '''Pass'''
        super().__init__()

    def _output(self) -> ndarray:
        '''
        Compute output.
        '''
        return 1.0/(1.0 + np.exp(-1.0 * self.input_))

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Comput input gradients.
        '''
        sigmoid_backward = self.output * (1.0 - self.output)
        input_grad = sigmoid_backward * output_grad
        return input_grad

## Layers

In [13]:
class Layer:
    '''
    A 'layer' of neurons in a neural network.
    '''

    def __init__(self, neurons: int):
        '''
        The number of 'neurons' roughly corresponds to the 'breadth' of the layer
        '''
        self.neurons = neurons
        self.first = True
        self.params: List[ndarray] = []
        self.param_grads: List[ndarray] = []
        self.operations: List[Opeartion] = []

    def __setup_layer(self, num_in: int) -> None:
        '''
        The _setup_layer function must be implemented for each layer.
        '''
        raise NotImplementedError()

    def __forward(self, input_: ndarray) -> ndarray:
        '''
        Passes input forward through a series of operations.
        '''

        if self.first:
            self._setup_layer(input_)
            self.first = False

        self.input_ = input_

        for operation in self.operations:
            input_ = operation.forawrd(input_)

        self.output = input_
        return self.output

    def backward(self, output_grad: ndarray) -> ndarray:
        '''
        Passes output_grad backward through a series of operations.
        Checks appropriate shapes.
        '''
        assert_same_shape(self.output_, output_grad)

        for operation in reversed(self.operations):
            output_grad = operation.backward(output_grad)

        input_grad = ouput_grad

        self._param_grad()
        return input_grad

    def _param_grad(self) -> ndarray:
        '''
        Extracts the _param_grads from a layer's operations.
        '''
        self.param_grads = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.param_grads.append(operation.param_grad)

    def _params(self) -> ndarray:
        '''
        Extracts the _params from a layers' operations.
        '''
        self.params = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.params.append(operation.param)