In [1]:
import numpy as np
import time
from utils import model
from inference import log_marginal_likelihood
from inference import zstates_old_method as zs
from utils import hyperparameters

pi = 0.1
mu = 0.001
sigmabg = 0.001
sigma = 0.01
tau = 1 / (0.005 * 0.005)

x, y, csnps, v = model.simulate(pi = pi,
                             mu = mu,
                             sigmabg = sigmabg,
                             sigma = sigma,
                             tau = tau)

nvar = x.shape[0]
nsample = x.shape[1]
params = np.array([pi, mu, sigma, sigmabg, tau])
scaledparams = hyperparameters.scale(params)
zstates = zs.create(scaledparams, x, y, 2, nvar, 0.98)

In [2]:
start_time = time.time()
l, m = log_marginal_likelihood.full(scaledparams, x, y, zstates)
print ("Log marginal likelihood from full calculation: {:f}".format(m))
print("Calculated in {:f} seconds ---\n".format(time.time() - start_time))

start_time = time.time()
m, der, pz, BZinvlist, Sinvlist = log_marginal_likelihood.iterative_inverse(scaledparams, x, y, zstates)
print ("Log marginal likelihood from fast calculation: {:f}".format(m))
print("Calculated in {:f} seconds ---\n".format(time.time() - start_time))

Log marginal likelihood from full calculation: -326.949697
Calculated in 1.842703 seconds ---

Log marginal likelihood from fast calculation: -326.949697
Calculated in 1.999472 seconds ---



In [3]:
# Derivative of pi
delta = 0.00001
newparams = hyperparameters.scale(params)
newparams[0] += delta
newm = log_marginal_likelihood.iterative_inverse(newparams, x, y, zstates, grad = False)[0]
pi_grad = (newm - m) / delta
print ("Derivative of pi by brute force is {:f}".format(pi_grad))
print ("Derivative of pi from equation is {:f}".format(der[0]))

pi_grad = 0
for i, z in enumerate(zstates):
    nz = len(z)
    picomp = nz / pi - (nvar - nz) / (1 - pi)
    pi_grad += pz[i] * picomp
pi_grad = params[0] * (1 - params[0]) * pi_grad
    
print ("Derivative of pi from equation is {:f}".format(-pi_grad))

Derivative of pi by brute force is 18.000090
Derivative of pi from equation is 18.000000
Derivative of pi from equation is 18.000000


In [4]:
# Derivative of mu
delta = 0.00001
newparams = hyperparameters.scale(params)
newparams[1] += delta
newm = log_marginal_likelihood.iterative_inverse(newparams, x, y, zstates, grad = False)[0]
#newm = log_marginal_likelihood.full(newparams, x, y, zstates)[1]
mu_grad = (newm - m) / delta
print ("Derivative of mu by brute force is {:f}".format(mu_grad))
print ("Derivative of mu from equation is {:f}".format(der[1]))

def mat3mul(A, B, C):
    return np.dot(A, np.dot(B, C))

mu_grad = 0
sigmabg2 = sigmabg * sigmabg
sigma2 = sigma * sigma
h = 1/sigma2 - 1/sigmabg2
sigz0 = np.repeat(sigmabg2, nvar)

for i, z in enumerate(zstates):
    BZinv = BZinvlist[i]
    Sinv  = Sinvlist[i] 
    
    muzTx  = np.zeros(nsample)
    zTx  = np.zeros(nsample)
    for zpos in z:
        muzTx += mu * x[zpos, :]
        zTx  += x[zpos, :]
    #y_minus_M = y - mu * x[zpos, :]
    y_minus_M = y - muzTx
    #mucomp = np.dot(np.dot(x[zpos,:], Sinv), y_minus_M)
    mucomp = np.dot(np.dot(zTx, Sinv), y_minus_M)
    mu_grad += pz[i] * mucomp
        
mu_grad = mu_grad / 100   
print ("Derivative of mu from equation is {:f}".format(-mu_grad))

Derivative of mu by brute force is -0.099584
Derivative of mu from equation is -0.099594
Derivative of mu from equation is -0.099594


In [None]:
# Derivative of sigma
delta = 0.0001
newparams = hyperparameters.scale(params)
newparams[2] += delta
newm = log_marginal_likelihood.iterative_inverse(newparams, x, y, zstates, grad = False)[0]
sigma_grad = (newm - m) / delta
print ("Derivative of sigma by brute force is {:f}".format(sigma_grad))
print ("Derivative of sigma from equation is {:f}".format(der[2]))

sigma_grad5 = 0
sigmabg2 = sigmabg * sigmabg
sigma2 = sigma * sigma
h = 1/sigma2 - 1/sigmabg2
sigz0 = np.repeat(sigmabg2, nvar)

start_time = time.time()
for i, z in enumerate(zstates):
    nz = len(z)
    if nz == 0:
        B0 = np.linalg.inv(np.diag(sigz0)) + tau * np.dot(x, x.T)
        B0inv = np.linalg.inv(B0)
        logB0det = np.linalg.slogdet(B0)[1]

        log_probz = nz * np.log(pi) + (nvar - nz) * np.log(1 - pi)
        Sinv = - tau * tau * np.dot(np.dot(x.T, B0inv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        logdetS = - nsample * np.log(tau) + (nvar - nz) * np.log(sigmabg2) + nz * np.log(sigma2) + logB0det
        
    else:
        zpos = z[0]
        mod = h / (1 + h * B0inv[zpos, zpos])
        BZinv = B0inv - mod * np.outer(B0inv[:,zpos], B0inv[zpos,:])
        Sinv = - tau * tau * np.dot(np.dot(x.T, BZinv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        y_minus_M = y - mu * x[zpos, :]
        
        xT_diag2sig_x = np.outer(x[zpos, :], x[zpos, :]) * 2 * sigma
        Sinv_xT_diag2sig_x = np.dot(Sinv, xT_diag2sig_x)
        dlogdetS_dsigma = np.trace(Sinv_xT_diag2sig_x)
        dSinv_dsigma = - np.dot(Sinv_xT_diag2sig_x, Sinv)
        term1 = - 0.5 * dlogdetS_dsigma
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dsigma, y_minus_M))
        
        sigma_grad5 += pz[i] * (term1 + term2)
        
print ("Derivative from equation 5 is {:f}. Calculated in {:f} seconds".format(sigma_grad5, time.time() - start_time))

sigma_grad6 = 0
start_time = time.time()
for i, z in enumerate(zstates):
    nz = len(z)
    if nz == 0:
        B0 = np.linalg.inv(np.diag(sigz0)) + tau * np.dot(x, x.T)
        B0inv = np.linalg.inv(B0)
        logB0det = np.linalg.slogdet(B0)[1]

        log_probz = nz * np.log(pi) + (nvar - nz) * np.log(1 - pi)
        Sinv = - tau * tau * np.dot(np.dot(x.T, B0inv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        logdetS = - nsample * np.log(tau) + (nvar - nz) * np.log(sigmabg2) + nz * np.log(sigma2) + logB0det
        
    else:
        zpos = z[0]
        mod = h / (1 + h * B0inv[zpos, zpos])
        BZinv = B0inv - mod * np.outer(B0inv[:,zpos], B0inv[zpos,:])
        Sinv = - tau * tau * np.dot(np.dot(x.T, BZinv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        y_minus_M = y - mu * x[zpos, :]
        
        dlogdetS_dsigma = 2 * (1 - BZinv[zpos, zpos] / sigma2) / sigma
        BZinv_diag2bysig3_BZinv = 2 * np.outer(BZinv[:, zpos], BZinv[zpos, :]) / sigma2 / sigma
        dSinv_dsigma = - tau * tau * np.dot(x.T, np.dot(BZinv_diag2bysig3_BZinv, x))
        term1 = - 0.5 * dlogdetS_dsigma
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dsigma, y_minus_M))

        sigma_grad6 += pz[i] * (term1 + term2)

print ("Derivative from equation 6 is {:f}. Calculated in {:f} seconds".format(sigma_grad6, time.time() - start_time))

In [None]:
def mat3mul(A, B, C):
    return np.dot(A, np.dot(B, C))

# Derivative of sigbg
delta = 0.00000001
newparams = np.array([pi, mu, sigma, sigmabg + delta, tau])
newm = log_marginal_likelihood.iterative_inverse(newparams, x, y, zstates)[1]
sigbg_grad = (newm - m) / delta
print ("Derivative of sigbg by brute force is {:f}".format(sigbg_grad))

pz = np.exp(l - m)
sigmabg2 = sigmabg * sigmabg
sigma2 = sigma * sigma
h = 1/sigma2 - 1/sigmabg2
sigz0 = np.repeat(sigmabg2, nvar)

sigbg_grad5 = 0
start_time = time.time()
for i, z in enumerate(zstates):
    nz = len(z)
    if nz == 0:
        B0 = np.linalg.inv(np.diag(sigz0)) + tau * np.dot(x, x.T)
        B0inv = np.linalg.inv(B0)
        logB0det = np.linalg.slogdet(B0)[1]

        log_probz = nz * np.log(pi) + (nvar - nz) * np.log(1 - pi)
        Sinv = - tau * tau * np.dot(np.dot(x.T, B0inv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        logdetS = - nsample * np.log(tau) + (nvar - nz) * np.log(sigmabg2) + nz * np.log(sigma2) + logB0det
        
        dlamzinv_dsigbg = np.diag(np.repeat(2 * sigmabg, nvar))
        dS_dsigbg = mat3mul(x.T, dlamzinv_dsigbg, x)
        Sinv_dS_dsigbg = np.dot(Sinv, dS_dsigbg)
        dlogdetS_dsigbg = np.trace(Sinv_dS_dsigbg)
        
        dSinv_dsigbg = - np.dot(Sinv_dS_dsigbg, Sinv)
        term1 = - 0.5 * dlogdetS_dsigbg
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dsigbg, y_minus_M))
        
        sigbg_grad5 += pz[i] * (term1 + term2)
               
    else:
        zpos = z[0]
        mod = h / (1 + h * B0inv[zpos, zpos])
        BZinv = B0inv - mod * np.outer(B0inv[:,zpos], B0inv[zpos,:])
        Sinv = - tau * tau * np.dot(np.dot(x.T, BZinv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        y_minus_M = y - mu * x[zpos, :]
        
        dlamzinv_dsigbg = np.diag(np.repeat(2 * sigmabg, nvar))
        dlamzinv_dsigbg[z] = 0
        dS_dsigbg = mat3mul(x.T, dlamzinv_dsigbg, x)
        Sinv_dS_dsigbg = np.dot(Sinv, dS_dsigbg)
        dlogdetS_dsigbg = np.trace(Sinv_dS_dsigbg)
        
        dSinv_dsigbg = - np.dot(Sinv_dS_dsigbg, Sinv)
        term1 = - 0.5 * dlogdetS_dsigbg
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dsigbg, y_minus_M))
        
        sigbg_grad5 += pz[i] * (term1 + term2)
        
print ("Derivative from equation 5 is {:f}. Calculated in {:f} seconds".format(sigbg_grad5, time.time() - start_time))

sigbg_grad6 = 0
start_time = time.time()
for i, z in enumerate(zstates):
    nz = len(z)
    if nz == 0:
        B0 = np.linalg.inv(np.diag(sigz0)) + tau * np.dot(x, x.T)
        B0inv = np.linalg.inv(B0)
        logB0det = np.linalg.slogdet(B0)[1]

        log_probz = nz * np.log(pi) + (nvar - nz) * np.log(1 - pi)
        Sinv = - tau * tau * np.dot(np.dot(x.T, B0inv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        logdetS = - nsample * np.log(tau) + (nvar - nz) * np.log(sigmabg2) + nz * np.log(sigma2) + logB0det
        
        dlogdetS_dsigbg = 2 * np.sum(sigmabg2 - np.diag(B0inv)) / sigmabg2 / sigmabg
        dB0_dsigbg = np.diag(np.repeat( - 2.0 / sigmabg2 / sigmabg, nvar))
        dB0inv_dsigbg = - np.dot(B0inv, np.dot(dB0_dsigbg, B0inv))
        dSinv_dsigbg = - tau * tau * np.dot(x.T, np.dot(dB0inv_dsigbg, x))
        term1 = - 0.5 * dlogdetS_dsigbg
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dsigbg, y_minus_M))
        sigbg_grad6 += pz[i] * (term1 + term2)
        
    else:
        zpos = z[0]
        mod = h / (1 + h * B0inv[zpos, zpos])
        BZinv = B0inv - mod * np.outer(B0inv[:,zpos], B0inv[zpos,:])
        Sinv = - tau * tau * np.dot(np.dot(x.T, BZinv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        y_minus_M = y - mu * x[zpos, :]

        Afull = np.sum(sigmabg2 - np.diag(BZinv))
        Asub = sigmabg2 - BZinv[zpos, zpos]
        dlogdetS_dsigbg = 2 * (Afull - Asub) / sigmabg2 / sigmabg

        dBZ_dsigbg = np.diag(np.repeat( - 2.0 / sigmabg2 / sigmabg, nvar))
        dBZ_dsigbg[z, z] = 0.0
        dBZinv_dsigbg = - np.dot(BZinv, np.dot(dBZ_dsigbg, BZinv))
        dSinv_dsigbg = - tau * tau * np.dot(x.T, np.dot(dBZinv_dsigbg, x))
        term1 = - 0.5 * dlogdetS_dsigbg
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dsigbg, y_minus_M))

        sigbg_grad6 += pz[i] * (term1 + term2)

print ("Derivative from equation 6 is {:f}. Calculated in {:f} seconds".format(sigbg_grad6, time.time() - start_time))

In [None]:
# Derivative of tau
delta = 0.0001
newparams = np.array([pi, mu, sigma, sigmabg, tau + delta])
newm = log_marginal_likelihood.iterative_inverse(newparams, x, y, zstates)[1]
tau_grad = (newm - m) / delta
print ("Derivative of tau by brute force is {:f}".format(tau_grad))

pz = np.exp(l - m)
tau_grad = 0
sigmabg2 = sigmabg * sigmabg
sigma2 = sigma * sigma
h = 1/sigma2 - 1/sigmabg2
sigz0 = np.repeat(sigmabg2, nvar)

start_time = time.time()
for i, z in enumerate(zstates):
    nz = len(z)
    if nz == 0:
        B0 = np.linalg.inv(np.diag(sigz0)) + tau * np.dot(x, x.T)
        B0inv = np.linalg.inv(B0)
        logB0det = np.linalg.slogdet(B0)[1]

        log_probz = nz * np.log(pi) + (nvar - nz) * np.log(1 - pi)
        Sinv = - tau * tau * np.dot(np.dot(x.T, B0inv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        logdetS = - nsample * np.log(tau) + (nvar - nz) * np.log(sigmabg2) + nz * np.log(sigma2) + logB0det
        y_minus_M = y
        
        dlogdetS_dtau = np.trace(- Sinv) / tau / tau
        dSinv_dtau = np.dot(Sinv, Sinv) / tau / tau
        term1 = - 0.5 * dlogdetS_dtau
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dtau, y_minus_M))
        tau_grad += pz[i] * (term1 + term2)
                
    else:
        zpos = z[0]
        mod = h / (1 + h * B0inv[zpos, zpos])
        BZinv = B0inv - mod * np.outer(B0inv[:,zpos], B0inv[zpos,:])
        Sinv = - tau * tau * np.dot(np.dot(x.T, BZinv), x)
        Sinv[np.diag_indices_from(Sinv)] += tau

        y_minus_M = y - mu * x[zpos, :]
        
        dlogdetS_dtau = np.trace(- Sinv) / tau / tau
        dSinv_dtau = np.dot(Sinv, Sinv) / tau / tau
        term1 = - 0.5 * dlogdetS_dtau
        term2 = - 0.5 * np.dot(y_minus_M.T, np.dot(dSinv_dtau, y_minus_M))
        tau_grad += pz[i] * (term1 + term2)
        
print ("Derivative of tau is {:f}. Calculated in {:f} seconds".format(tau_grad, time.time() - start_time))