In [129]:
import numpy as np
from typing import Optional, Tuple, Union


class Dense:
    """Implements fully-connected layer"""

    def __init__(self, n_in: int, n_out: int, use_bias: bool = True):
        """Initializes Dense layer.
        The weights are initialized using uniformly distributed values in range [-1, 1]. Bias vector is not initialized if `use_bias` is False.
        Weigths matrix has the shape (`n_in`, `n_out`), bias vector has the shape (`n_out`, ).

        Arguments:
            n_in: Positive integer, dimensionality of input space.
            n_out: Positive integer, dimensionality of output space.
            use_bias: Whether the layer uses a bias vector."""
        self._weights = np.random.uniform(low=-1, high=1, size=(n_in + use_bias, n_out))
        self.use_bias = use_bias

    @property
    def weights(self) -> tuple[np.ndarray, np.ndarray] | tuple[np.ndarray]:
        """Returns weights used by the layer."""
        if self.use_bias:
            return self._weights[:-1], self._weights[-1]
        return (self._weights,)

    @weights.setter
    def weights(self, value):
        if len(value) > 1:
            self._weights[:-1] = value[0]
            self._weights[-1] = value[1]
        else:
            self._weights = value[0]

    @property
    def input(self) -> np.ndarray:
        """Returns the last input received by the layer"""
        return self._input

    @input.setter
    def input(self, other):
        self._input = other

    def __call__(self, x: np.ndarray) -> np.ndarray:
        """Performs the layer forward pass.

        Arguments:
            x: Input array of shape (`batch_size`, `n_in`)

        Returns:
            An array of shape (`batch_size`, `n_out`)"""
        self._input = x
        if self.use_bias:
            x = np.c_[x, np.ones(x.shape[0])]
        return x @ self._weights

    def grad(
        self, gradOutput: np.ndarray
    ) -> (
        tuple[np.ndarray, tuple[np.ndarray, np.ndarray]]
        | tuple[np.ndarray, tuple[np.ndarray]]
    ):
        """Computes layer gradients

        Arguments:
            gradOutput: Gradient of loss function with respect to the layer output, an array of shape (`batch_size`, `n_out`).

        Returns:
            A tuple object:
                Gradient of loss function with respect to the layer input, an array of shape (`batch_size`, `n_in`)
                Gradient of loss function with respect to the layer's weights:
                    An array of shape (`n_in`, `n_out`).
                    Optional array of shape (`n_out`, )."""

        x = self.input
        if self.use_bias:
            x = np.c_[x, np.ones(x.shape[0])]
        w_grad = x.T @ gradOutput
        x_grad = gradOutput @ (self._weights[:-1] if self.use_bias else self._weights).T
        return (x_grad, (w_grad[:-1], w_grad[-1]) if self.use_bias else (w_grad,))

In [130]:
m = Dense(3, 5)
print(m(np.random.uniform(size=(10, 3))).shape)
g = m.grad(np.random.uniform(size=(10,5)))

(10, 5)


In [133]:
g[1][1].shape

(5,)