## Derivative of SoftMax

In [3]:
import numpy as np
import torch

# ----- Step 1: Softmax -----
def softmax(z):
    e_z = np.exp(z - np.max(z))
    return e_z / np.sum(e_z)

# ----- Step 2: Derivative of Softmax (Jacobian) using for-loop -----
def softmax_jacobian_loop(s):
    k = len(s)
    jac = np.zeros((k, k))
    for i in range(k):
        for j in range(k):
            if i == j:
                jac[i, j] = s[i] * (1 - s[i])
            else:
                jac[i, j] = -s[i] * s[j]
    return jac

# ----- Step 3: Manual Computation -----
z_np = np.array([1.0, 2.0, 3.0])
s_np = softmax(z_np)
jac_np = softmax_jacobian_loop(s_np)

print("Manual softmax output:")
print(s_np)
print("\nManual Jacobian:")
print(jac_np)

# ----- Step 4: PyTorch autograd -----
z_torch = torch.tensor(z_np, dtype=torch.float32, requires_grad=True)

def torch_softmax(z):
    return torch.softmax(z, dim=0)

jac_torch = torch.autograd.functional.jacobian(torch_softmax, z_torch)

print("\nPyTorch Jacobian:")
print(jac_torch.detach().numpy())

# ----- Step 5: Difference -----
diff = jac_np - jac_torch.detach().numpy()
print("\nDifference (Manual - PyTorch):")
print(diff)


Manual softmax output:
[0.09003057 0.24472847 0.66524096]

Manual Jacobian:
[[ 0.08192507 -0.02203304 -0.05989202]
 [-0.02203304  0.18483645 -0.1628034 ]
 [-0.05989202 -0.1628034   0.22269543]]

PyTorch Jacobian:
[[ 0.08192507 -0.02203305 -0.05989202]
 [-0.02203305  0.18483646 -0.1628034 ]
 [-0.05989202 -0.1628034   0.22269543]]

Difference (Manual - PyTorch):
[[-2.71092000e-09  5.48208919e-10  3.00065944e-10]
 [ 5.48208919e-10 -1.56301046e-08 -5.40720094e-09]
 [ 3.00065944e-10 -5.40720094e-09  1.38184478e-09]]
