In [1]:
import numpy as np


In [2]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, weight_scale=0.01):

        # Weights between (input + bias) → (hidden)
        # Shape: (hidden_size, input_size + 1)
        self.W1 = np.random.randn(hidden_size, input_size + 1) * weight_scale  # ⚠️ scaled init

        # Weights between (hidden + bias) → (output)
        # Shape: (output_size, hidden_size + 1)
        self.W2 = np.random.randn(output_size, hidden_size + 1) * weight_scale  # ⚠️ scaled init

        # Store sizes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

    @staticmethod
    def sigmoid(z):
        return 1.0 / (1.0 + np.exp(-z))

    def _append_bias_row(self, A):
        ones = np.ones((1, A.shape[1]), dtype=A.dtype)
        return np.vstack([A, ones])

    def forward(self, X):
        """
        X: input with shape (input_size, batch_size) ← mini-batch
        Output: y_hat with shape (output_size, batch_size)
        """
        # 1) Add bias neuron to input layer: (input_size+1, batch_size)
        Xb = self._append_bias_row(X)

        # 2) Pre-activation of hidden layer: z1 = W1 · Xb
        z1 = self.W1 @ Xb

        # 3) Hidden layer activation: a1 = sigmoid(z1) → (hidden_size, batch_size)
        a1 = self.sigmoid(z1)

        # 4) Add bias neuron to a1: (hidden_size+1, batch_size)
        a1b = self._append_bias_row(a1)

        # 5) Pre-activation of output layer: z2 = W2 · a1b
        z2 = self.W2 @ a1b

        # 6) Output activation: a2 = sigmoid(z2) → (output_size, batch_size)
        a2 = self.sigmoid(z2)

        # Save for backpropagation
        self.cache = {
            "X": X, "Xb": Xb,
            "z1": z1, "a1": a1, "a1b": a1b,
            "z2": z2, "a2": a2
        }
        return a2


## Test

In [4]:
input_size, hidden_size, output_size = 784, 30, 10
B = 16  # batch size

nn = NeuralNetwork(input_size, hidden_size, output_size, weight_scale=0.01)  # ← مقیاس خارج از منابع
X = np.random.rand(input_size, B)  # (784, 16)

Yhat = nn.forward(X)

assert nn.W1.shape == (hidden_size, input_size + 1)
assert nn.W2.shape == (output_size, hidden_size + 1)
assert nn.cache["Xb"].shape == (input_size + 1, B)
assert nn.cache["a1"].shape == (hidden_size, B)
assert nn.cache["a1b"].shape == (hidden_size + 1, B)
assert Yhat.shape == (output_size, B)

print("All shape checks passed. Forward OK.")


All shape checks passed. Forward OK.
