# Task 2
Implement basic backward pass using only numpy:

Perform forward pass and backward pass, and use the gradient check function to verify our implementation

In [2]:
import numpy as np
from utils import Module

## Linear Layer

In your previous task we defined `forward(input)` pass for our Linear class. Now we continue in creation of our own framework a little further with defining the `backward(dNet)` function. The separation of linear unit and activation is beneficial for backward propagation and optimization.


In [3]:
#------------------------------------------------------------------------------
#   Linear class
#------------------------------------------------------------------------------
class Linear(Module):
    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.W = np.random.randn(out_features, in_features)
        self.dW = np.zeros_like(self.W) # Watch-out for the shape - it has to be same as W
        self.b = np.zeros((out_features, 1))
        self.db = np.zeros_like(self.b) # Watch-out for the shape - it has to be same as W

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.aPrevious = input
        self.m = self.aPred.shape[0]
        net = self.W @ input + self.b
        return net

    def backward(self, dz: np.ndarray) -> np.ndarray:
        self.dW = (1.0/self.m) * np.sum(np.matmul(dz, self.aPrevious.transpose((0,2,1))), axis=0)
        self.db = (1.0/self.m) * np.sum(dz, axis=0)
        return np.matmul(self.W.T, dz)

## Activations
Implement backward pass for Sigmoid, Tanh and ReLU activation functions.

In [4]:
#------------------------------------------------------------------------------
#   SigmoidActivationFunction class
#------------------------------------------------------------------------------
class Sigmoid(Module):
    def __init__(self):
        super(Sigmoid, self).__init__()

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.aPrevious = input
        return 1.0 / (1.0 + np.exp(-input))

    def backward(self, da) -> np.ndarray:
        a = self(self.aPrevious)
        return np.multiply(da, np.multiply(a, 1 - a))


#------------------------------------------------------------------------------
#   RELUActivationFunction class
#------------------------------------------------------------------------------
class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.aPrevious = input
        return np.maximum(input, 0)

    def backward(self, da) -> np.ndarray:
        return np.multiply(da, (self.aPrevious > 0) * 1)

## Loss functions
For successful backward pass, the computation and derivation of Loss function is necessary.
The most common Loss functions are **Mean Square Error** _(MSE, L2)_ **Mean Absolute Error** _(MAE, L1)_ and **Binary Cross Entropy** _(BCE, Log Loss)_ and their modifications according to what is better for the current dataset.

Let's implement MSE and BCE Loss functions as Modules of our little framework.

In [5]:
#------------------------------------------------------------------------------
#   MeanSquareErrorLossFunction class
#------------------------------------------------------------------------------
class MSELoss(Module):
    def __init__(self):
        super(MSELoss, self).__init__()

    def forward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return np.mean(np.power(target - input, 2), axis=0)

    def backward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return np.mean(-2 * (target.data - input.data), axis=0)


#------------------------------------------------------------------------------
#   BinaryCrossEntropyLossFunction class
#------------------------------------------------------------------------------
class BCELoss(Module):
    def __init__(self):
        super(BCELoss, self).__init__()

    def forward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return -(target * np.log(input) + np.multiply((1 - target), np.log(1 - input)))

    def backward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return -np.divide(target, input) + np.divide(1 - target, 1 - input)

## Model
As in previous task, use `Model` class to encapsulate all layers of our MLP and define backward pass.
Iterate over its modules stored in parameter OrderedDict `modules` -> `self.modules` in the correct order.

We use call `.add_module(...)` to add layers of our MLP (network).


In [6]:
#------------------------------------------------------------------------------
#   Model class
#------------------------------------------------------------------------------
class Model(Module):
    def __init__(self):
        super(Model, self).__init__()

    def forward(self, input) -> np.ndarray:
        for name, module in self.modules.items():
            # print(f'Layer fw:{name}, a.shape = {input.shape} \n{input}')
            input = module(input)
            # print(f'z.shape = {input.shape} \n{input}')
        return input

    def backward(self, z: np.ndarray):
        for name, module in reversed(self.modules.items()):
            # print(f'Layer bw {name}, z = \n{z}')
            z = module.backward(z)
            # print(f'dZ = \n{z}')

## Main Processing Cell

 1. Initialize dataset (`dataset_Circles`).
 2. Declare a simple model (at least 3 hidden layer).
 3. Perform forward pass through the network.
 4. Compute loss.
 5. Backward prop loss.
 6. Backward pass MLP.
 7. Check your computation of gradients via [`gradient_check`](https://datascience-enthusiast.com/DL/Improving_DeepNeural_Networks_Gradient_Checking.html)
 8. Start crying.
 9. Repeat until correct ;)


In [7]:
from dataset import dataset_circles
from utils import gradient_check

In [8]:
dataset_features_X, dataset_labels_Y = dataset_circles(m=128, radius=0.7, noise=0.0)

###>>> start of solution
mlp = Model()
mlp.add_module(Linear(2, 3), 'Dense_1')
mlp.add_module(ReLU(), 'Tanh_1')
mlp.add_module(Linear(3, 4), 'Dense_2')
mlp.add_module(ReLU(), 'Tanh_2')
mlp.add_module(Linear(4, 5), 'Dense_3')
mlp.add_module(ReLU(), 'Tanh_3')
mlp.add_module(Linear(5, 1), 'Dense_4_out')
mlp.add_module(Sigmoid(), 'Sigmoid')
loss_fn = BCELoss()

# losses = []
predicted_Y_hat = mlp.forward(dataset_features_X)
loss = loss_fn(predicted_Y_hat, dataset_labels_Y)
# losses += [np.mean(loss)]
dLoss = loss_fn.backward(predicted_Y_hat, dataset_labels_Y)
mlp.backward(dLoss)

for name, module in mlp.modules.items():
    if hasattr(module, 'dW'):
        print(name, module.dW.shape, module.db.shape, sep='\n')

gradient_check(mlp, loss_fn, dataset_features_X, dataset_labels_Y)

Dense_1
(3, 2)
(3, 1)
Dense_2
(4, 3)
(4, 1)
Dense_3
(5, 4)
(5, 1)
Dense_4_out
(1, 5)
(1, 1)
[92mYour backward propagation works perfectly fine! difference = 5.627174213001082e-09[0m
