# Testing against other software libraries

### Check of vectors equal from vector and batch implementation
Have not had time to check this properly yet



Even with the same seed, create_layers and create_layers_batch generate different weight tensors:

Vector uses W_vec with shape (out_dim, in_dim).
Batch uses W_bat with shape (in_dim, out_dim).

Resetting the RNG and sampling for a different shape produces different matrices (not transposes of each other). So you’re comparing gradients from different networks.

✅ Fix: Create one set of parameters (e.g., vector convention), then convert to the other convention by transposing each W. Do the same conversion for gradients when comparing.

In [None]:
def vec_to_batch_layers(layers_vec):
    """
    (W_vec: out x in, b: out) -> (W_bat: in x out, b: out)
    """
    return [(Wv.T.copy(), bv.copy()) for (Wv, bv) in layers_vec]

def batch_to_vec_grads(grads_bat):
    """
    (dW_bat: in x out, db: out) -> (dW_vec: out x in, db: out)
    """
    return [(dWb.T.copy(), db.copy()) for (dWb, db) in grads_bat]



# ---- Setup ----
network_input_size = 2
layer_output_sizes = [3, 4]
activation_funcs = [sigmoid, ReLU]
activation_ders = [sigmoid_der, ReLU_der]

# Same single example in both forms
np.random.seed(2024)
x_batch = np.random.rand(1, network_input_size)
np.random.seed(2024)
target_batch = np.random.rand(1, 4)
x_vector = x_batch[0]
target_vector = target_batch[0]

# Create ONE parameter set in vector convention (assumed: W_vec is (out, in))
np.random.seed(2024)
layers_vector = create_layers(network_input_size, layer_output_sizes)

# Convert that SAME network to batch convention
layers_batch = vec_to_batch_layers(layers_vector)

# ---- Run ----
vector_grads = backpropagation(x_vector, layers_vector, activation_funcs, target_vector, activation_ders)
batch_grads  = backpropagation_batch(x_batch, layers_batch, activation_funcs, target_batch, activation_ders)

print(vector_grads)
print(batch_grads)

# Convert batch grads back to vector orientation
batch_grads_as_vector = batch_to_vec_grads(batch_grads)

# ---- Compare ----
for i, ((dW_v, db_v), (dW_bv, db_bv)) in enumerate(zip(vector_grads, batch_grads_as_vector)):
    print(f"Layer {i}: dW match ->", np.allclose(dW_v, dW_bv, atol=1e-8))
    print(f"Layer {i}: db match ->", np.allclose(db_v, db_bv, atol=1e-8))