In [20]:
import numpy as np
from scipy.stats import norm
import torch
from torch.distributions.normal import Normal

# Gradients

In [21]:
# Gradient from Pytorch
zero_tensor = torch.tensor(0.)
one_tensor = torch.tensor(1.)
w_torch = torch.normal(zero_tensor, one_tensor)
x_torch = torch.normal(zero_tensor, one_tensor)
y_torch = torch.tensor(1.)
alpha_torch = torch.normal(zero_tensor, one_tensor)
alpha_torch.requires_grad = True
beta_torch = torch.normal(zero_tensor, one_tensor)
beta_torch.requires_grad = True
mu_torch = x_torch * beta_torch
sigma_torch = (w_torch * alpha_torch).exp()
normal_dist = Normal(loc=mu_torch, scale=sigma_torch)
prob = normal_dist.cdf((y_torch+1).log()) - normal_dist.cdf(y_torch.log())
log_prob = prob.log()
log_prob.backward()
print(f"beta: {beta_torch.grad}")
print(f"alpha: {alpha_torch.grad}")

beta: 0.056196969002485275
alpha: 1.3264514207839966


In [22]:
# Numerical derivative
w = w_torch.detach().numpy()
x = x_torch.detach().numpy()
y = y_torch.detach().numpy()
alpha = alpha_torch.detach().numpy()
beta = beta_torch.detach().numpy()
mu = mu_torch.detach().numpy()
sigma = sigma_torch.detach().numpy()

def log_pmf(y, mu, sigma):
    Phi_right = norm.cdf(x=np.log(y+1), loc=mu, scale=sigma)
    Phi_left = norm.cdf(x=np.log(y), loc=mu, scale=sigma)
    prob = Phi_right - Phi_left
    log_prob = np.log(prob)
    return log_prob

dbeta = 1e-3
deriv_beta_numeric = (log_pmf(y, mu + x*dbeta, sigma) - log_pmf(y, mu, sigma)) / dbeta
print(f"beta: {deriv_beta_numeric}")

dalpha = 1e-3
deriv_alpha_numeric = (log_pmf(y, mu, sigma * np.exp(w * dalpha)) - log_pmf(y, mu, sigma)) / dalpha
print(f"alpha: {deriv_alpha_numeric}")

beta: 0.05613741642518377
alpha: 1.3264086067601433


In [23]:
# Analytical gradient
z_bar = (np.log(y+1)-mu)/sigma
z_underbar = (np.log(y)-mu)/sigma
beta_num = norm.pdf(z_underbar) - norm.pdf(z_bar)
beta_den = norm.cdf(z_bar) - norm.cdf(z_underbar)
beta_grad = beta_num / beta_den * x / sigma
print(f"beta: {beta_grad}")

# Simple analytical gradient
alpha_num = z_underbar * norm.pdf(z_underbar) - z_bar * norm.pdf(z_bar)
alpha_den = norm.cdf(z_bar) - norm.cdf(z_underbar)
alpha_grad = alpha_num / alpha_den * w
print(f"alpha: {alpha_grad}")

beta: 0.05619716554683066
alpha: 1.3264510479330736


# Hessians

In [24]:
# Gradient from Pytorch
zero_tensor = torch.tensor(0.)
one_tensor = torch.tensor(1.)
w_torch = torch.normal(zero_tensor, one_tensor)
x_torch = torch.normal(zero_tensor, one_tensor)
y_torch = torch.tensor(1.)
alpha_torch = torch.normal(zero_tensor, one_tensor)
alpha_torch.requires_grad = True
beta_torch = torch.normal(zero_tensor, one_tensor)
beta_torch.requires_grad = True
mu_torch = x_torch * beta_torch
mu_torch.retain_grad()
sigma_torch = (w_torch * alpha_torch).exp()
sigma_torch.retain_grad()
z_bar = ((y_torch+1).log() - mu_torch) / sigma_torch
z_underbar = ((y_torch).log() - mu_torch) / sigma_torch
normal_dist = Normal(loc=0, scale=1)
Phi_bar = normal_dist.cdf(z_bar)
Phi_underbar = normal_dist.cdf(z_underbar)
pi = Phi_bar - Phi_underbar
log_pi = pi.log()
phi_bar = normal_dist.log_prob(z_bar).exp()
phi_underbar = normal_dist.log_prob(z_underbar).exp()
gamma = (phi_bar - phi_underbar) / pi
delta = (z_bar * phi_bar - z_underbar * phi_underbar) / pi
lambda_ = (z_bar**2 * phi_bar - z_underbar**2 * phi_underbar) / pi
grad_beta = -gamma*x_torch/sigma_torch

# Uncomment to check gradient of pi wrt mu
# pi.backward()
# print(mu_torch.grad)
# print(-1/sigma_torch * (phi_bar - phi_underbar))

# Uncomment to check gradient of log(pi) wrt beta
# log_pi.backward()
# print(f"analytical: {grad_beta}")
# print(f"torch: {beta_torch.grad}")

# Uncomment to check gradient of phi_bar - phi_underbar wrt mu
# num = phi_bar - phi_underbar
# num.backward()
# print(mu_torch.grad)
# print((z_bar * phi_bar - z_underbar * phi_underbar)/sigma_torch)
# print(1/sigma_torch * gamma**2 - gamma)

# Uncomment to check gradient of gamma wrt mu
# gamma.backward()
# print(mu_torch.grad)
# print(1/sigma_torch * (delta + gamma**2))

# Uncomment to check gradient of phi_bar wrt sigma
# phi_bar.backward()
# print(sigma_torch.grad)
# print(phi_bar * z_bar**2 / sigma_torch)

# Uncomment to check gradient of Phi_bar wrt sigma
# Phi_bar.backward()
# print(sigma_torch.grad)
# print(-phi_bar * z_bar / sigma_torch)

# Propagate gradient (comment if checking gradient)
grad_beta.backward()

In [28]:
print(f"beta^2 torch: {beta_torch.grad}")
hessian_beta = -x_torch**2/sigma_torch**2 * (gamma**2 + delta)
print(f"beta^2 analytical: {hessian_beta}")

beta^2 torch: -0.9477335214614868
beta^2 analytical: -0.9477336406707764


In [29]:
print(f"beta alpha torch: {alpha_torch.grad}")
hessian_beta_alpha = -1/sigma_torch * x_torch * w_torch * (lambda_ + gamma * (delta - 1))
print(f"beta alpha analytical: {hessian_beta_alpha}")

beta alpha torch: -1.0424295663833618
beta alpha analytical: -1.0424295663833618


In [30]:
# Gradient from Pytorch
zero_tensor = torch.tensor(0.)
one_tensor = torch.tensor(1.)
w_torch = torch.normal(zero_tensor, one_tensor)
x_torch = torch.normal(zero_tensor, one_tensor)
y_torch = torch.tensor(1.)
alpha_torch = torch.normal(zero_tensor, one_tensor)
alpha_torch.requires_grad = True
beta_torch = torch.normal(zero_tensor, one_tensor)
beta_torch.requires_grad = True
mu_torch = x_torch * beta_torch
mu_torch.retain_grad()
sigma_torch = (w_torch * alpha_torch).exp()
sigma_torch.retain_grad()
z_bar = ((y_torch+1).log() - mu_torch) / sigma_torch
z_underbar = ((y_torch).log() - mu_torch) / sigma_torch
normal_dist = Normal(loc=0, scale=1)
Phi_bar = normal_dist.cdf(z_bar)
Phi_underbar = normal_dist.cdf(z_underbar)
pi = Phi_bar - Phi_underbar
log_pi = pi.log()
phi_bar = normal_dist.log_prob(z_bar).exp()
phi_underbar = normal_dist.log_prob(z_underbar).exp()
gamma = (phi_bar - phi_underbar) / pi
delta = (z_bar * phi_bar - z_underbar * phi_underbar) / pi
lambda_ = (z_bar**2 * phi_bar - z_underbar**2 * phi_underbar) / pi
kappa = (z_bar * (z_bar**2 - 1) * phi_bar - z_underbar*(z_underbar**2-1) * phi_underbar) / pi
grad_alpha = -delta*w_torch

# Uncomment to check gradient of z_bar * phi_bar wrt sigma
# z_bar_phi_bar = z_bar * phi_bar
# z_bar_phi_bar.backward()
# print(sigma_torch.grad)
# print(z_bar * phi_bar / sigma_torch * (z_bar**2 - 1))

# Propagate gradient (comment if checking gradient)
grad_alpha.backward()

print(alpha_torch.grad)
print(-(kappa + delta**2) * w_torch**2)

tensor(-0.1520)
tensor(-0.1520, grad_fn=<MulBackward0>)
