In [None]:
class Layer:
    def __init__(self, input_size: int, output_size: int, activation: ActivationFunction=Identity(), random_seed: int=17):
        if random_seed is not None:
            np.random.seed(random_seed)  # в колабе распространяется только на текущую ячейку
        self.weights = np.random.uniform(-1, 1, size=(input_size, output_size))
        self.input_size = input_size
        self.output_size = output_size
        self.last_x = None
        self.activation = activation
        
    def forward(self, inputs: np.ndarray) -> np.ndarray:
        # check dimensionality
        batch_size = inputs.shape[0]
        if inputs.shape != (batch_size, self.input_size):
            raise ValueError(f'Unappropriate size of input. Expected: {[batch_size, self.input_size]} actual: {inputs.shape}')
        # add biases
        # inputs = np.hstack([inputs, np.ones((batch_size, 1))])
        # record input to class
        self.last_x = inputs
        
        # make outputs
        y = inputs @ self.weights
        self.last_y = y
        outputs = self.activation(y)
        return outputs

    def backward(self, grad_outputs: np.ndarray, learning_rate: int=0.01) -> np.ndarray:
        if self.last_x is None:
            raise ValueError('try to call backward before forward')
        grad_activation = self.activation.grad(self.last_y)
        dZ = grad_outputs * grad_activation
        dX = dZ @ self.weights.T
        dW = self.last_x.T @ grad_outputs
        self.weights -= learning_rate * dW
        return dX

In [None]:
def backward(self, delta_1: np.ndarray, learning_rate: int=0.01) -> np.ndarray:
        if self.last_x is None:
            raise ValueError('try to call backward before forward')
        
        grad_activation = self.activation.grad(self.last_7)
        delta_0 =  (delta_1 @ self.weights.T) * grad_activation
        dW = self.last_x.T @ delta_1
        self.weights -= learning_rate * dW
        return delta_0

In [None]:
class Layer:
    def __init__(self, input_size: int, output_size: int, activation: ActivationFunction=Identity(), random_seed: int=17):
        if random_seed is not None:
            np.random.seed(random_seed)  # в колабе распространяется только на текущую ячейку
        self.weights = np.random.uniform(-1, 1, size=(input_size, output_size))
        self.input_size = input_size
        self.output_size = output_size
        self.last_x = None
        self.activation = activation
        self.biases = np.random.uniform(-1, 1, size=(output_size,))
        
    def forward(self, inputs: np.ndarray) -> np.ndarray:
        # check dimensionality
        batch_size = inputs.shape[0]
        if inputs.shape != (batch_size, self.input_size):
            raise ValueError(f'Unappropriate size of input. Expected: {[batch_size, self.input_size]} actual: {inputs.shape}')
        # add biases
        # inputs = np.hstack([inputs, np.ones((batch_size, 1))])
        # record input to class
        self.last_x = inputs
        
        # make outputs
        y = inputs @ self.weights + self.biases
        self.last_y = y
        outputs = self.activation(y)
        return outputs

    def backward(self, grad_output: np.ndarray, learning_rate: int=0.01) -> np.ndarray:
        if self.last_x is None:
            raise ValueError('try to call backward before forward')
        
        grad_activation = self.activation.grad(self.last_y)
        dldz = grad_output * grad_activation
        dldx = dldz @ self.weights.T
        dldw = self.last_x.T @ dldz
        dldb = np.sum(dldz, axis=0)
        self.weights -= learning_rate * dldw
        self.biases -= learning_rate * dldb
        return dldx
        

In [None]:
class Layer:
    def __init__(self, input_size: int, output_size: int, activation: ActivationFunction = Identity(), random_seed: int = 17):
        np.random.seed(random_seed)
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros(output_size)
        self.inputs = None
        self.z = None
        self.a = None

    def forward(self, inputs: np.ndarray) -> np.ndarray:
        self.inputs = inputs  # (batch_size, input_size)
        self.z = np.dot(inputs, self.weights) + self.biases  # (batch_size, output_size)
        self.a = self.activation(self.z)  # (batch_size, output_size)
        return self.a

    def backward(self, grad_output: np.ndarray, learning_rate: float = 0.01) -> np.ndarray:
        # 1. Градиент по выходам (∂L/∂Z)
        grad_z = grad_output * self.activation.grad(self.z)  # (batch_size, output_size)
        
        # 2. Градиент по входам (∂L/∂X)
        grad_input = np.dot(grad_z, self.weights.T)  # (batch_size, input_size)
        
        # 3. Градиент по весам (∂L/∂W)
        grad_weights = np.dot(self.inputs.T, grad_z)  # (input_size, output_size)
        
        # 4. Градиент по смещениям (∂L/∂b)
        grad_biases = np.sum(grad_z, axis=0)  # (output_size,)
        
        # 5. Обновление параметров
        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases
        
        # 6. Возврат градиента по входам для использования в предыдущем слое
        return grad_input