In [1]:
import numpy as np 

def get_activation_func(name: str):
    if name == 'sigmoid':
        return lambda x: 1 / (1 + np.exp(-x))
    elif name == 'relu':
        return lambda x: np.maximum(0, x)
    elif name == 'tanh':
        return lambda x: np.tanh(x)
    else:
        raise KeyError(f'No such activation function: {name}')

In [4]:
def softmax(x):
    """
    Args:
        x: A 1-D numpy array.
    Returns:
        A 1-D numpy array containing the softmax values.
    """
    exp_vals = np.exp(x - np.max(x)) 
    return exp_vals / np.sum(exp_vals, axis=0)


In [5]:
import numpy as np

# Get the sigmoid activation function
sigmoid_func = get_activation_func('sigmoid')

# Get the ReLU activation function
relu_func = get_activation_func('relu')

# Get the tanh activation function
tanh_func = get_activation_func('tanh')

# Test the activation functions
x = np.array([-2, -1, 0, 1, 2])

print("Sigmoid:", sigmoid_func(x))
print("ReLU:", relu_func(x))
print("Tanh:", tanh_func(x))
print("Softmax:", softmax(x))

Sigmoid: [0.11920292 0.26894142 0.5        0.73105858 0.88079708]
ReLU: [0 0 0 1 2]
Tanh: [-0.96402758 -0.76159416  0.          0.76159416  0.96402758]
Softmax: [0.01165623 0.03168492 0.08612854 0.23412166 0.63640865]


In [None]:
import numpy as np

class BatchNormalization:
    def __init__(self, input_dim, epsilon=1e-5, momentum=0.9):
        self.epsilon = epsilon
        self.momentum = momentum
        self.input_dim = input_dim
        self.gamma = np.ones((1, input_dim))  # Scale parameter
        self.beta = np.zeros((1, input_dim))  # Shift parameter
        self.running_mean = np.zeros((1, input_dim))
        self.running_var = np.ones((1, input_dim))
    
    def forward(self, x, is_training=True):
        if is_training:
            # Calculate mean and variance along the batch axis
            mean = np.mean(x, axis=0, keepdims=True)
            var = np.var(x, axis=0, keepdims=True)
            
            # Normalize the input
            x_normalized = (x - mean) / np.sqrt(var + self.epsilon)
            
            # Update running mean and variance using momentum
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
        else:
            # Use running mean and variance during inference
            x_normalized = (x - self.running_mean) / np.sqrt(self.running_var + self.epsilon)
        
        # Scale and shift
        out = self.gamma * x_normalized + self.beta
        return out
    
    def backward(self, dout):
        N, D = dout.shape
        
        # Calculate gradients for gamma and beta
        dgamma = np.sum(dout * x_normalized, axis=0, keepdims=True)
        dbeta = np.sum(dout, axis=0, keepdims=True)
        
        # Calculate gradient of normalized input
        dx_normalized = dout * self.gamma
        
        # Calculate gradients for mean and variance
        dvar = np.sum(dx_normalized * (x - mean) * -0.5 * (var + self.epsilon)**(-1.5), axis=0, keepdims=True)
        dmean = np.sum(dx_normalized * -1 / np.sqrt(var + self.epsilon), axis=0, keepdims=True) + \
                dvar * np.mean(-2 * (x - mean), axis=0, keepdims=True)
        
        # Calculate gradient of input
        dx = dx_normalized / np.sqrt(var + self.epsilon) + dvar * 2 * (x - mean) / N + dmean / N
        
        return dx, dgamma, dbeta
