## Code Snippets

In [None]:
## get the norm of the gradient
#g_grad_norm = Variable(Tensor(1).fill_(0.0), requires_grad=True)
#for p in generator.parameters():
#    g_grad_norm += p.grad.data.norm(2).item()**2
#g_grad_norm = g_grad_norm **(0.5)

In [None]:
## look at 1d measures of UM
## look at raw MNIST data
## double check code
## look into top-subspace

Here's how to take the "Hessian vector product", i.e. $ \sum_j H_{ij} v_j $ for $H_{ij} = \partial_i \partial_j f$ and $v_j$ an arbitrary vector. 

This came from this PyTorch help forum post: https://discuss.pytorch.org/t/calculating-hessian-vector-product/11240/4

In [None]:
## first, let v be an arbitrary vector:
v = Variable(torch.Tensor([1, 1]))
x = Variable(torch.Tensor([1, 1]), requires_grad=True)
f = 3*x[0]**2 + 4*x[0]*x[1] + x[1]**2
grad_f, = torch.autograd.grad(f, x, create_graph=True)
z = grad_f @ v
z.backward()
print(x.grad)

## now, let v be x - note that now the answer changes because the gradient also hits v
x = Variable(torch.Tensor([1, 1]), requires_grad=True)
v = x
f = 3*x[0]**2 + 4*x[0]*x[1] + x[1]**2
grad_f, = torch.autograd.grad(f, x, create_graph=True)
z = grad_f @ v
z.backward()
print(x.grad)

## lastly, clone + detach v so that the derivative does not hit it, even though it is related to x
x = Variable(torch.Tensor([1, 1]), requires_grad=True)
v = x.clone().detach()
f = 3*x[0]**2 + 4*x[0]*x[1] + x[1]**2
grad_f, = torch.autograd.grad(f, x, create_graph=True)
z = grad_f @ v
z.backward()
print(x.grad)

In [None]:
## lastly, clone + detach v so that the derivative does not hit it, even though it is related to x
x = Variable(torch.Tensor([1, 1]), requires_grad=True)
f = 3*x[0]**2 + 4*x[0]*x[1] + x[1]**2
grad_f, = torch.autograd.grad(f, x, create_graph=True)
v = grad_f.clone().detach()
z = grad_f @ v
z.backward()
print(x.grad)

In [None]:
z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], latent_dim))))
gen_imgs = generator(z)
g_loss = adversarial_loss(discriminator(gen_imgs), valid)

theta_g_tmp = []
for param in generator.parameters():
    theta_g_tmp.append(param.view(-1))
theta_g = torch.cat(theta_g_tmp)

In [None]:
#grad_g  = torch.autograd.grad(g_loss, generator.parameters(), create_graph=True)
grad_g  = torch.autograd.grad(g_loss, theta_g, create_graph=True)

In [None]:

theta_g_tmp = []
for param in generator.parameters():
    theta_g_tmp.append(param.view(-1))
theta_g = torch.cat(theta_g_tmp)

In [None]:
theta_g

In [None]:
z = grad_g @ theta_g
z.backward()
print(theta_g.grad)

In [None]:
linear = nn.Linear(10, 20)
input = torch.randn(1, 10)
out = linear(input).sum()
grads = torch.autograd.grad([out], linear.parameters(), create_graph=True)
flatten = torch.cat([g.reshape(-1) for g in grads if g is not None])
x = torch.randn_like(flatten)
print(flatten.shape)
flatten2 = Variable(flatten.data, requires_grad=True)
hvps = torch.autograd.grad([flatten2 @ x], linear.parameters(), allow_unused=True)

In [None]:
hvps

In [None]:
flatten

In [None]:
x = torch.randn_like(flatten)
print(flatten.shape) ## torch.Size([1792])
x2 = Variable(x.data, requires_grad=True)

In [None]:
hvps = torch.autograd.grad([flatten @ x2], conv.parameters(), allow_unused=True)

In [None]:
hvps

In [None]:
print(hvps[1]) ## None

In [None]:
flatten2 = torch.cat([g.reshape(-1) for g in hvps if g is not None])
print(flatten2.shape) ## torch.Size([1728])

In [None]:
## a simple neural network
linear = nn.Linear(10, 20)
x = torch.randn(1, 10)
y = linear(x).sum()

## compute the gradient and make a copy that is detached from the graph
grad = torch.autograd.grad(y, linear.parameters(), create_graph=True)
v = grad.clone().detach()

## compute the Hessian vector product
z = grad @ v
z.backward()

In [None]:
## lastly, clone + detach v so that the derivative does not hit it, even though it is related to x
x = Variable(torch.Tensor([1, 1]), requires_grad=True)
v = x.clone().detach()
f = 3*x[0]**2 + 4*x[0]*x[1] + x[1]**2
grad_f, = torch.autograd.grad(f, x, create_graph=True)
z = grad_f @ v
z.backward()

In [None]:
grad.view

In [None]:
hvps = torch.autograd.grad([flatten2 @ x], linear.parameters(), allow_unused=True)

In [None]:
flatten = torch.cat([g.reshape(-1) for g in grads if g is not None])
x = torch.randn_like(flatten)
print(flatten.shape)
flatten2 = Variable(flatten.data, requires_grad=True)
hvps = torch.autograd.grad([flatten2 @ x], linear.parameters(), allow_unused=True)

In [None]:
## lastly, clone + detach v so that the derivative does not hit it, even though it is related to x
x = Variable(torch.Tensor([1, 1]), requires_grad=True)
v = x.clone().detach()
f = 3*x[0]**2 + 4*x[0]*x[1] + x[1]**2
grad_f, = torch.autograd.grad(f, x, create_graph=True)
z = grad_f @ v
z.backward()
print(x.grad)

In [None]:
grad_f @ v

In [None]:
v

In [None]:
grad = Variable(torch.zeros(220), requires_grad=True)

In [None]:
linear = nn.Linear(10, 20)
x = torch.randn(1, 10)
L = linear(x).sum()

for p in linear.parameters():
    theta = torch.zeros(p.size(), requires_grad=True)
    theta += p.data
    
grads = torch.autograd.grad(L, theta, create_graph=True)

#gnorm = Variable(torch.zeros(1), requires_grad=True)[0].sum()
#for g in grads:
#    gnorm = gnorm + g.pow(2).sum()

In [None]:
theta = torch.tensor(theta, requires_grad=True)

In [None]:
torch.autograd.grad(grads, theta, create_graph=True)

In [None]:
import torch

linear = torch.nn.Linear(10, 20)
x = torch.randn(1, 10)
L = linear(x).sum()**2
grad = torch.autograd.grad(L, linear.parameters(), create_graph=True)
gnorm = 0
for g in grad:
    gnorm = gnorm + g.pow(2).sum()
grad2 = torch.autograd.grad(gnorm, linear.parameters(), create_graph=True)
grad2

In [None]:
x