In [5]:
# fix random seed
import numpy as np
np.random.seed(1)


In [8]:

# Dropout
class DropoutLayer:
    # Init
    def __init__(self, dropout_rate=0.3):
        # Store rate, we invert it as for example for dropout
        # of 0.1 we need success rate of 0.9
        self.rate = 1 - dropout_rate

    # Forward pass
    def forward(self, inputs):
        # Save input values
        self.inputs = inputs

        # Generate and save scaled mask
        self.mask = np.random.binomial(1, self.rate, size=inputs.shape) / self.rate

        # Apply mask to output values
        return inputs * self.mask

    # Backward pass
    def backward(self, grad_output):
        # Gradient on values
        return grad_output * self.mask
    
# Example usage:
dropout_layer = DropoutLayer(dropout_rate=0.5)

# Example minibatch of input vectors
x_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

# Forward pass during training
output_train = dropout_layer.forward(x_batch)
print("Dropout output (training):", output_train)

# Forward pass during inference
output_inference = dropout_layer.forward(x_batch)
print("Dropout output (inference):", output_inference)

# Example gradient from the next layer for the minibatch
grad_output_batch = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])

# Backward pass
grad_input_batch = dropout_layer.backward(grad_output_batch)
print("Dropout gradient:", grad_input_batch)


Dropout output (training): [[ 0.  0.  6.]
 [ 8.  0. 12.]]
Dropout output (inference): [[ 2.  4.  0.]
 [ 0.  0. 12.]]
Dropout gradient: [[0.2 0.4 0. ]
 [0.  0.  1.2]]


In [9]:
import numpy as np

class DropoutLayer:
    def __init__(self, dropout_rate=0.5):
        self.dropout_rate = dropout_rate
        self.mask = None
        self.input = None
        self.output = None

    def forward(self, x, training=True):
        if training:
            # Generate a binary mask where values < dropout_rate are set to 0 (dropped out),
            # and values >= dropout_rate are set to 1 (kept).
            self.mask = (np.random.rand(*x.shape) < self.dropout_rate) / (1 - self.dropout_rate)
            self.output = x * self.mask
        else:
            # During inference, no dropout is applied, and the output is the input scaled by (1 - dropout_rate).
            self.output = x * (1 - self.dropout_rate)

        # Save the input for the backward pass
        self.input = x
        return self.output

    def backward(self, grad_output):
        # Apply the dropout mask during the backward pass
        grad_input = grad_output * self.mask
        return grad_input


# Example usage:
dropout_layer = DropoutLayer(dropout_rate=0.5)

# Example minibatch of input vectors
x_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

# Forward pass during training
output_train = dropout_layer.forward(x_batch, training=True)
print("Dropout output (training):", output_train)

# Forward pass during inference
output_inference = dropout_layer.forward(x_batch, training=False)
print("Dropout output (inference):", output_inference)

# Example gradient from the next layer for the minibatch
grad_output_batch = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])

# Backward pass
grad_input_batch = dropout_layer.backward(grad_output_batch)
print("Dropout gradient:", grad_input_batch)


Dropout output (training): [[ 2.  4.  0.]
 [ 0.  0. 12.]]
Dropout output (inference): [[0.5 1.  1.5]
 [2.  2.5 3. ]]
Dropout gradient: [[0.2 0.4 0. ]
 [0.  0.  1.2]]
