In [1]:
import numpy as np
import torch as tr
from torch import nn
import torch.optim as optim



In [None]:
import numpy as np

class Neural_Net:
    """
     Architecture:
      input_len -> widths[0] -> widths[1] -> ... -> widths[depth-1] -> output_len

    Parameters stored after initialize():
      self.W: list of weight matrices
      self.b: list of bias vectors
    """

    def __init__(self, input_len: int, output_len: int, depth: int, widths: np.ndarray):
        self.input_len = int(input_len)
        self.output_len = int(output_len)
        self.depth = int(depth)
        self.widths = np.array(widths, dtype=int)

        # --- Assertions ---
        assert self.input_len > 0, "input_len must be a positive integer."
        assert self.output_len > 0, "output_len must be a positive integer."
        assert self.depth >= 0, "depth must be >= 0 (number of hidden layers)."

        assert self.widths.ndim == 1, "widths must be a 1D array-like."
        assert len(self.widths) == self.depth, (
            f"depth={self.depth} but len(widths)={len(self.widths)}. "
            "Provide one width per hidden layer."
        )
        assert np.all(self.widths > 0), "All entries of widths must be positive integers."

        # Parameter containers (filled by initialize)
        self.W = None
        self.b = None

    def initialize(self, seed: int | None = None, scale: float = 0.01):
        """
        Create weight matrices and bias vectors.

        Weights:
          W[0] shape: (widths[0], input_len)
          W[i] shape: (widths[i], widths[i-1]) for i=1..depth-1
          W_out shape: (output_len, widths[depth-1]) if depth>0 else (output_len, input_len)

        Biases:
          b[i] shape: (widths[i],)
          b_out shape: (output_len,)

        Returns
        -------
        (W, b) where each is a list of numpy arrays.
        """
        rng = np.random.default_rng(seed)

        W = []
        b = []

        if self.depth == 0:
            # No hidden layers: input -> output directly
            W.append(scale * rng.standard_normal((self.output_len, self.input_len)))
            b.append(np.zeros((self.output_len,), dtype=float))
        else:
            # Input -> first hidden
            W.append(scale * rng.standard_normal((self.widths[0], self.input_len)))
            b.append(np.zeros((self.widths[0],), dtype=float))

            # Hidden -> hidden
            for i in range(1, self.depth):
                W.append(scale * rng.standard_normal((self.widths[i], self.widths[i - 1])))
                b.append(np.zeros((self.widths[i],), dtype=float))

            # Last hidden -> output
            W.append(scale * rng.standard_normal((self.output_len, self.widths[-1])))
            b.append(np.zeros((self.output_len,), dtype=float))

        self.W = W
        self.b = b
        return 

    def forward_pass(self, x, activation="relu"):
    """
    Forward pass through the network.

    Parameters
    ----------
    x : array-like
        Shape (input_len,) for a single sample or (N, input_len) for a batch.
    activation : {"relu","tanh","sigmoid","identity"} or callable
        Activation applied on hidden layers only. Output layer is linear.

    Returns
    -------
    y : np.ndarray
        Shape (output_len,) for single sample or (N, output_len) for batch.
    """
    assert self.W is not None and self.b is not None and len(self.W) > 0, \
        "Call initialize() before forward_pass()."

    x = np.asarray(x, dtype=float)
    assert x.shape[-1] == self.input_len, \
        f"Expected last dim {self.input_len}, got {x.shape[-1]}."

    # Choose activation
    if callable(activation):
        act = activation
    else:
        a = str(activation).lower()
        if a == "relu":
            act = lambda z: np.maximum(0.0, z)
        elif a == "tanh":
            act = np.tanh
        elif a == "sigmoid":
            act = lambda z: 1.0 / (1.0 + np.exp(-z))
        elif a in ("identity", "linear", "none"):
            act = lambda z: z
        else:
            raise ValueError(f"Unknown activation: {activation}")

    h = x
    # Hidden layers: affine + activation
    for W, b in zip(self.W[:-1], self.b[:-1]):
        h = h @ W.T + b
        h = act(h)

    # Output layer: affine only
    W_out, b_out = self.W[-1], self.b[-1]
    y = h @ W_out.T + b_out
    return y






