In [None]:
import numpy as np

class Dropout:
    def __init__ (self, drop_rate):
        # Noting that the drop_rate resembles the probability that a unit will be set to 0. (e.g. 0.5 for 50%)
        self.drop_rate = drop_rate
        # Mask will store the indices of the units which are kept (set to 1) during training.
        self.mask = None


    def forward(self, x, training=True):
        """
        Performs the forward pass for the dropout layer

        args:
            x (np.array) is the input data
            training (bool) if true, apply dropout, else return input as is.
        
        returns:
            The output data after applying dropout (if training is set to True), or data is just passing through.
        """
        # Mask is created, 1 (True) to keep the unit at 0 (False) to drop it.
        # (1-drop_rate) is the probability of keeping a unit.
        if training:
            self.mask = np.random.rand(*x.shape) > self.drop_rate
            # Multiply the mask by 1/p to maintain the expected value of values.
            return x * self.mask*1/(1-self.drop_rate)
        else:
            return x
    
    def backward(self, d_out):
        """
        Performs the backwards pass for the dropout layer.

        args:
            d_out (np.array) is the gradient from the subsequent layer

        returns:
            np.ndarray is the gradient passed to the preceding layer
        """
        # The gradient only flows through the neurons that weren't dropped in the forward pass, and the same inverted scaling factor
        # 1/(1-drop_rate) is applied to the gradient.
        return d_out * self.mask * 1/(1-self.drop_rate)