## Validity check for derivatives

In [3]:
import numpy as np
import helpers as hlp
import math
from random import random, seed
from numpy.linalg import multi_dot

In [4]:
# p is the number of beta coefficients in the last hidden layer
p = 10

# m is number of variational parameters, which is 
# p plus the variational parameter for the prior on lambda
m = p + 1

# number of factors in the factored covariance representation
k = 3

# B is a lower triangle m x k matrix and is the first component of the 
# covariance matrix
B = np.tril(np.random.rand(m,k))

# D is a diagonal matrix of dimension m x m and is the second component of the 
# covariance matrix
D = np.diag(np.random.rand(m,))

# Lambda is a diagonal matrix of dimension p
Lambda = np.diag(np.random.rand(p,))

seed(679305)
tau = random()

# Set iteration counter to 0
t = 0

In [5]:
B_zeta_path = '../../bdd100k_test_data/extracted_coefficients/11082020/B_zeta_predictions.csv'
beta_path = '../../bdd100k_test_data/extracted_coefficients/11082020/beta.csv'
z_path = '../../bdd100k_test_data/extracted_coefficients/11082020/z_predictions.csv'

# B_zeta is a n x q matrix
B_zeta = np.genfromtxt(B_zeta_path, delimiter=',')
beta = np.genfromtxt(beta_path, delimiter=',')
z = np.genfromtxt(z_path, delimiter=',')

n = B_zeta.shape[0]

In [15]:
# S(x, theta) is of dimension n x n
S = np.diag([(1 + ((B_zeta[i,:].T).dot(Lambda)).dot(B_zeta[i,:]))**(-1/2) for i in range(0,n)])

## Derivatives

In [16]:
def grad_theta_h_lambda_j(Lambda, S, beta, B_zeta, z, tau,j):
    s_diag = np.diag(S)
    psi_j = B_zeta[:,j]
    lambda_j_sq = np.diag(Lambda)[j]
    A = np.diag((B_zeta[:,j]**2)*lambda_j_sq)
    #a = (1/2)*lambda_j_sq*(psi_j**2).dot((s_diag**2)-(z**2))
    a = - (1/2)*lambda_j_sq*sum((psi_j**2)*(z**2))
    b = (1/2)*lambda_j_sq*sum((psi_j**2)*(s_diag**2))
    
    c = (1/2)*multi_dot([beta.T, B_zeta.T, A, S, z])
    d = (beta[j]**2)/(2*lambda_j_sq)
    e = - (lambda_j_sq/(tau**2))/(1+lambda_j_sq/(tau**2)) + 1/2
    return(a+b+c+d+e)
#delta_log_lambda_sq = [grad_theta_h_lambda_j(Lambda, S, beta, B_zeta, z, tau, j_i) for j_i in range(0,p)]

In [17]:
def delta_theta_h_beta(z, S, B_zeta, Lambda):
    return (z.T).dot(np.linalg.inv(S)).dot(B_zeta) - 2*(beta.T).dot(B_zeta.T).dot(B_zeta) + ((np.linalg.inv(Lambda))).dot(beta)
delta_beta = delta_theta_h_beta(z, S, B_zeta, Lambda)

In [18]:
def delta_tau(p, Lambda, tau):
    tau_sq = tau**2
    return -(p-1) + 2*sum([(Lambda[i,i]/tau_sq)*((1+(Lambda[i,i]/tau_sq))**(-1)) for i in range(0,Lambda.shape[0])]) - 2*(tau_sq/(1+tau_sq))   

## True log-density

In [19]:
def log_density(z, beta, S, B_zeta, Lambda, p, tau):
    a = - n*np.log(2*math.pi)  - (1/2)*sum(np.log(np.diag(S**2))) - multi_dot([(1/2)*(z - multi_dot([S, B_zeta, beta])).T, np.linalg.inv(S.dot(S)), (z - multi_dot([S, B_zeta, beta]))])
    b = -(1/2)*sum(np.log(np.diag(Lambda))) - (1/2)*sum([beta[i]**2/(Lambda[i,i]) for i in range(0,p)])
    c = -(p-1)*math.log(tau) - sum(np.log([1+Lambda[i,i]/(tau**2) for i in range(0,Lambda.shape[0])])) - (p-1)*math.log(1+tau**2)                                                   
    return a + b +  c   

In [20]:
## Numerical derivative w.r.t beta

In [21]:
j = 0
def d_fun_beta_j(z, beta, S, B_zeta,j, tau ):
    h = 1e-5 #in theory h is an infinitesimal
    beta_new = beta.copy()
    beta_new[j] =  beta_new[j] + h
    return (log_density(z, beta_new, S, B_zeta, Lambda, p,tau)-log_density(z, beta, S, B_zeta, Lambda, p,tau))/h
print('True derivative: ' + str(delta_beta))
print('vs.')
print('Numerical derivative: ' + str([d_fun_beta_j(z, beta, S, B_zeta,j_i, tau) for j_i in range(0,len(beta))]))
# maybe look over this again as it always seems 2000 too much (in absolute terms)

True derivative: [ -8964.50578201  22194.63423351 -51054.46191743  -6906.85197079
 -15264.66719193 -42038.76275591  50540.31749979 -59886.77023252
  21676.20983574  -5701.46403624]
vs.
Numerical derivative: [-10701.65021228604, 25149.41970002837, -56057.6941265026, -8777.469221968204, -18439.805440721102, -48493.68929280899, 55832.5400634203, -68621.74329289701, 23638.338808086697, -7572.918260120786]


In [22]:
## Numerical derivative w.r.t lambda_j

In [23]:
def d_fun_lambda_j(z, beta, S, B_zeta,j, Lambda, tau ):
    h = 1e-5 #in theory h is an infinitesimal
    Lambda_new = Lambda.copy()
    Lambda_new[j,j] =  Lambda_new[j,j] + h
    S_new = np.diag([(1 + ((B_zeta[i,:].T).dot(Lambda_new)).dot(B_zeta[i,:]))**(-1/2) for i in range(0,n)])
    return (log_density(z, beta, S_new, B_zeta, Lambda_new, p, tau)-log_density(z, beta, S, B_zeta, Lambda, p, tau))/h
print('True derivative: ' + str([(grad_theta_h_lambda_j(Lambda, S, beta, B_zeta, z, tau,j_i)/np.sqrt(Lambda[j_i,j_i]))*2 for j_i in range(0,len(beta))]))
print('vs.')
print('Numerical derivative: ' + str([d_fun_lambda_j(z, beta, S, B_zeta,j_i, Lambda, tau) for j_i in range(0,len(beta))]))

True derivative: [-18042.466282752615, -89934.17344570265, -189809.8966059878, -36279.33566477528, -22419.30673875748, -52558.57148886552, -49117.09098568077, -49411.756355062214, -115388.37006651584, -23442.79442615554]
vs.
Numerical derivative: [-19472.275793668814, -115715.39000142364, -118095.39821697398, -22394.298555445854, -19706.614760798402, -63646.63293643388, -93619.79222157969, -105267.35539315267, -59641.768931760445, -43130.21662237588]


## Numerical derivative w.r.t tau

In [24]:
def d_fun_tau_j(z, beta, S, B_zeta, Lambda, p, tau):
    h = 1e-7 #in theory h is an infinitesimal
    tau_new = tau
    tau_new =  tau_new + h
    return (log_density(z, beta, S, B_zeta, Lambda, p, tau_new) - log_density(z, beta, S, B_zeta, Lambda, p, tau))/h
print('True derivative: ' + str(delta_tau(p, Lambda, tau)/tau))
print('vs.')
print('Numerical derivative: ' + str(d_fun_tau_j(z, beta, S, B_zeta, Lambda, p, tau)))
# not so good but also cannot finde any more mistakes...

True derivative: 11.497585375919817
vs.
Numerical derivative: 6.836489774286747
