In [1]:
import numpy as np
from scipy.stats import norm
import torch
from torch.distributions.normal import Normal

# Gradients

In [2]:
# Gradient from Pytorch
zero_tensor = torch.tensor(0.)
one_tensor = torch.tensor(1.)
w_torch = torch.normal(zero_tensor, one_tensor)
x_torch = torch.normal(zero_tensor, one_tensor)
y_torch = torch.tensor(1.)
alpha_torch = torch.normal(zero_tensor, one_tensor)
alpha_torch.requires_grad = True
beta_torch = torch.normal(zero_tensor, one_tensor)
beta_torch.requires_grad = True
mu_torch = x_torch * beta_torch
sigma_torch = (w_torch * alpha_torch).exp()
normal_dist = Normal(loc=mu_torch, scale=sigma_torch)
prob = normal_dist.cdf((y_torch+1).log()) - normal_dist.cdf(y_torch.log())
log_prob = prob.log()
log_prob.backward()
print(f"beta: {beta_torch.grad}")
print(f"alpha: {alpha_torch.grad}")

beta: -0.2405138909816742
alpha: -0.8877367973327637


In [3]:
# Numerical derivative
w = w_torch.detach().numpy()
x = x_torch.detach().numpy()
y = y_torch.detach().numpy()
alpha = alpha_torch.detach().numpy()
beta = beta_torch.detach().numpy()
mu = mu_torch.detach().numpy()
sigma = sigma_torch.detach().numpy()

def log_pmf(y, mu, sigma):
    Phi_right = norm.cdf(x=np.log(y+1), loc=mu, scale=sigma)
    Phi_left = norm.cdf(x=np.log(y), loc=mu, scale=sigma)
    prob = Phi_right - Phi_left
    log_prob = np.log(prob)
    return log_prob

dbeta = 1e-3
deriv_beta_numeric = (log_pmf(y, mu + x*dbeta, sigma) - log_pmf(y, mu, sigma)) / dbeta
print(f"beta: {deriv_beta_numeric}")

dalpha = 1e-3
deriv_alpha_numeric = (log_pmf(y, mu, sigma * np.exp(w * dalpha)) - log_pmf(y, mu, sigma)) / dalpha
print(f"alpha: {deriv_alpha_numeric}")

beta: -0.2406954875702194
alpha: -0.8879392923399898


In [4]:
# Analytical gradient
z_bar = (np.log(y+1)-mu)/sigma
z_underbar = (np.log(y)-mu)/sigma
beta_num = norm.pdf(z_underbar) - norm.pdf(z_bar)
beta_den = norm.cdf(z_bar) - norm.cdf(z_underbar)
beta_grad = beta_num / beta_den * x / sigma
print(f"beta: {beta_grad}")

# Simple analytical gradient
alpha_num = z_underbar * norm.pdf(z_underbar) - z_bar * norm.pdf(z_bar)
alpha_den = norm.cdf(z_bar) - norm.cdf(z_underbar)
alpha_grad = alpha_num / alpha_den * w
print(f"alpha: {alpha_grad}")

beta: -0.240513931713416
alpha: -0.8877369317303965
