In [28]:
import numpy as np
from scipy.stats import norm
import torch
from torch.distributions.normal import Normal

# Gradients /Hessians taking derivative wrt $\beta$ first

In [29]:
# Gradient from Pytorch
zero_tensor = torch.tensor(0.)
one_tensor = torch.tensor(1.)
w_torch = torch.normal(zero_tensor, one_tensor)
x_torch = torch.normal(zero_tensor, one_tensor)
y_torch = torch.tensor(1.)
alpha_torch = torch.normal(zero_tensor, one_tensor)
alpha_torch.requires_grad = True
beta_torch = torch.normal(zero_tensor, one_tensor)
beta_torch.requires_grad = True
mu_torch = x_torch * beta_torch
mu_torch.retain_grad()
sigma_torch = (w_torch * alpha_torch).exp()
sigma_torch.retain_grad()
z_bar = ((y_torch+1).log() - mu_torch) / sigma_torch
z_underbar = ((y_torch).log() - mu_torch) / sigma_torch
normal_dist = Normal(loc=0, scale=1)
Phi_bar = normal_dist.cdf(z_bar)
Phi_underbar = normal_dist.cdf(z_underbar)
pi = Phi_bar - Phi_underbar
log_pi = pi.log()
phi_bar = normal_dist.log_prob(z_bar).exp()
phi_underbar = normal_dist.log_prob(z_underbar).exp()
kappa_0 = (phi_bar - phi_underbar) / pi
kappa_1 = (z_bar * phi_bar - z_underbar * phi_underbar) / pi
kappa_2 = (z_bar**2 * phi_bar - z_underbar**2 * phi_underbar) / pi
kappa_3 = (z_bar**3 * phi_bar - z_underbar**3 * phi_underbar) / pi
grad_beta = -kappa_0*x_torch/sigma_torch

# Uncomment to check gradient of log(pi) wrt beta
# log_pi.backward()
# print(f"analytical: {grad_beta}")
# print(f"torch: {beta_torch.grad}")

grad_beta.backward()

In [30]:
print(f"beta^2 torch: {beta_torch.grad}")
hessian_beta = -x_torch**2/sigma_torch**2 * (kappa_0**2 + kappa_1)
print(f"beta^2 analytical: {hessian_beta}")

beta^2 torch: -0.2558543086051941
beta^2 analytical: -0.25585436820983887


In [31]:
print(f"beta alpha torch: {alpha_torch.grad}")
hessian_beta_alpha = -1/sigma_torch * x_torch * w_torch * (kappa_2 + kappa_0 * (kappa_1 - 1))
print(f"beta alpha analytical: {hessian_beta_alpha}")

beta alpha torch: -0.0679616630077362
beta alpha analytical: -0.06796164810657501


# Gradients /Hessians taking derivative wrt $\alpha$ first

In [23]:
# Gradient from Pytorch
zero_tensor = torch.tensor(0.)
one_tensor = torch.tensor(1.)
w_torch = torch.normal(zero_tensor, one_tensor)
x_torch = torch.normal(zero_tensor, one_tensor)
y_torch = torch.tensor(1.)
alpha_torch = torch.normal(zero_tensor, one_tensor)
alpha_torch.requires_grad = True
beta_torch = torch.normal(zero_tensor, one_tensor)
beta_torch.requires_grad = True
mu_torch = x_torch * beta_torch
mu_torch.retain_grad()
sigma_torch = (w_torch * alpha_torch).exp()
sigma_torch.retain_grad()
z_bar = ((y_torch+1).log() - mu_torch) / sigma_torch
z_underbar = ((y_torch).log() - mu_torch) / sigma_torch
normal_dist = Normal(loc=0, scale=1)
Phi_bar = normal_dist.cdf(z_bar)
Phi_underbar = normal_dist.cdf(z_underbar)
pi = Phi_bar - Phi_underbar
log_pi = pi.log()
phi_bar = normal_dist.log_prob(z_bar).exp()
phi_underbar = normal_dist.log_prob(z_underbar).exp()
kappa_0 = (phi_bar - phi_underbar) / pi
kappa_1 = (z_bar * phi_bar - z_underbar * phi_underbar) / pi
kappa_2 = (z_bar**2 * phi_bar - z_underbar**2 * phi_underbar) / pi
kappa_3 = (z_bar**3 * phi_bar - z_underbar**3 * phi_underbar) / pi
grad_alpha = -kappa_1*w_torch

# Uncomment to check gradient of log(pi) wrt beta
# log_pi.backward()
# print(f"analytical: {grad_alpha}")
# print(f"torch: {alpha_torch.grad}")

grad_alpha.backward()

print(alpha_torch.grad)
print(-(kappa_1 * (kappa_1 - 1) + kappa_3) * w_torch**2)

tensor(-5.3592)
tensor(-5.3592, grad_fn=<MulBackward0>)
