In [1]:
import torch

In [2]:
# EXPERIMENT 2: Constant Initialization - COMPLETE WORKING VERSION
print("\n" + "="*60)
print("EXPERIMENT 2: CONSTANT INITIALIZATION")
print("="*60)


EXPERIMENT 2: CONSTANT INITIALIZATION


In [3]:
# Recreate input with requires_grad (or we can just use it without grad, that's fine)
x = torch.tensor([[1.0, 2.0]])  # Input doesn't need grad

# Create fresh tensors - IMPORTANT: All new
W1_const = torch.ones(2, 3) * 0.5
W1_const.requires_grad = True

b1_const = torch.zeros(3)
b1_const.requires_grad = True

W2_const = torch.ones(3, 1) * 0.5
W2_const.requires_grad = True

b2_const = torch.zeros(1)
b2_const.requires_grad = True

print("\nInitial W1:")
print(W1_const)
print("\nNotice: All 3 neurons have IDENTICAL weights [0.5, 0.5]")



Initial W1:
tensor([[0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000]], requires_grad=True)

Notice: All 3 neurons have IDENTICAL weights [0.5, 0.5]


In [4]:
# Forward pass - MANUAL (so we see everything)
print("\n--- FORWARD PASS ---")
z1 = x @ W1_const + b1_const  # Hidden layer pre-activation
print(f"Hidden pre-activation (x @ W1 + b1):\n{z1}")
print(f"Notice: All 3 values are IDENTICAL: {z1[0, 0].item():.2f}")

a1 = z1  # No activation for now (or use relu if you want)
print(f"\nHidden activations (after relu/identity):\n{a1}")

z2 = a1 @ W2_const + b2_const  # Output layer
output = z2
print(f"\nOutput:\n{output}")
print(f"Output value: {output.item():.4f}")

# Target and loss
target = torch.tensor([[1.0]])
print(f"\nTarget: {target.item()}")

loss = ((output - target) ** 2).mean()
print(f"Loss (MSE): {loss.item():.4f}")


--- FORWARD PASS ---
Hidden pre-activation (x @ W1 + b1):
tensor([[1.5000, 1.5000, 1.5000]], grad_fn=<AddBackward0>)
Notice: All 3 values are IDENTICAL: 1.50

Hidden activations (after relu/identity):
tensor([[1.5000, 1.5000, 1.5000]], grad_fn=<AddBackward0>)

Output:
tensor([[2.2500]], grad_fn=<AddBackward0>)
Output value: 2.2500

Target: 1.0
Loss (MSE): 1.5625


In [5]:
# Backward pass
print("\n--- BACKWARD PASS ---")
loss.backward()

print("\nGradients for W1 (∂L/∂W1):")
print(W1_const.grad)
print("\nRow 0 (neuron 0 weights): ", W1_const.grad[:, 0])
print("Row 1 (neuron 1 weights): ", W1_const.grad[:, 1])
print("Row 2 (neuron 2 weights): ", W1_const.grad[:, 2])

print("\nGradients for W2 (∂L/∂W2):")
print(W2_const.grad)


--- BACKWARD PASS ---

Gradients for W1 (∂L/∂W1):
tensor([[1.2500, 1.2500, 1.2500],
        [2.5000, 2.5000, 2.5000]])

Row 0 (neuron 0 weights):  tensor([1.2500, 2.5000])
Row 1 (neuron 1 weights):  tensor([1.2500, 2.5000])
Row 2 (neuron 2 weights):  tensor([1.2500, 2.5000])

Gradients for W2 (∂L/∂W2):
tensor([[3.7500],
        [3.7500],
        [3.7500]])
