In [3]:
import torch


In [None]:

d = 3
X = torch.arange(1, 10).reshape(d, d)
h = torch.tensor([1, 2, 3])

result = X - h
print("Result of X - h:\n", result)

# Explicitly broadcast h to match X's shape
h_broadcasted = h.unsqueeze(0).expand(d, -1)
result = X - h_broadcasted

print("h broadcasted:\n", h_broadcasted)
print("Result of X - h:\n", result)


In [8]:
def grad(X: torch.Tensor, h: torch.Tensor) -> torch.Tensor:
    epsilon = 1e-8
    diff = X - h
    norm = torch.norm(diff, dim=1)
    
    # Check if the norm is close to zero to avoid division by zero
    safe_norm = torch.where(
        torch.isclose(norm, torch.zeros_like(norm), atol=epsilon),
        torch.ones_like(norm),  # Use ones to avoid division by zero
        norm
    )

    # Compute the gradient and sum over the batch
    # 'ij,i->j' means take 'ij' from diff, 'i' from -1/safe_norm, and sum over 'i' to produce 'j'
    print(safe_norm.size())
    grad = torch.einsum('ij,i->j', diff, -1 / safe_norm)

    return grad

# Example usage
n, d = 5, 3  # Example dimensions
X = torch.randn(n, d)  # Example batch of input data
h = torch.randn(d)  # Example parameter vector

# Calculate summed gradient for the batch
summed_gradients = grad(X, h)
print("Summed Gradients:\n", summed_gradients)


torch.Size([5])
Summed Gradients:
 tensor([0.4223, 1.9742, 3.1068])


In [8]:
M = torch.randn(5, 5)
print(M[:-1, -1].size())
print(M)

torch.Size([4])
tensor([[ 0.6877,  0.4684, -1.2936, -1.2275,  0.8056],
        [-1.5666,  0.1937, -0.3837, -0.2455, -1.4928],
        [-1.0429,  1.4055, -0.1048,  1.7292,  1.1662],
        [ 0.7462, -0.2710,  0.6413, -1.3080, -0.4603],
        [ 2.1570, -0.3662,  1.4468,  0.4635,  1.4983]])


In [9]:
M[-1, :-1] = M[:-1, -1]
print(M)

tensor([[ 0.6877,  0.4684, -1.2936, -1.2275,  0.8056],
        [-1.5666,  0.1937, -0.3837, -0.2455, -1.4928],
        [-1.0429,  1.4055, -0.1048,  1.7292,  1.1662],
        [ 0.7462, -0.2710,  0.6413, -1.3080, -0.4603],
        [ 0.8056, -1.4928,  1.1662, -0.4603,  1.4983]])
