In [4]:
import pandas as pd

import scipy as sc
from scipy.io import loadmat

import numpy as np

from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics.pairwise import polynomial_kernel

np.random.seed(123)

# Read Data

In [5]:
# splits
# images
# distances matrix
DATA_DIR = "../data"

# DATASET1: 17 Category Flowers
# 'val1', 'val2', 'val3', 'trn1', 'trn2', 'trn3', 'tst3', 'tst2', 'tst1'
'''splits = loadmat("%s/cat_flower/datasplits.mat" %DATA_DIR)

# 'D_siftbdy', 'D_siftint', 'D_hsv', 'D_hog',
distances = loadmat("%s/cat_flower/distancematrices17itfeat08.mat" %DATA_DIR)

imlist = loadmat("%s/cat_flower/trimaps/imlist.mat" %DATA_DIR)
'''

# DATASET2: 102 Category Flowers
# 'val1', 'val2', 'val3', 'trn1', 'trn2', 'trn3', 'tst3', 'tst2', 'tst1'
ds2_splits = loadmat("%s/102_cat_flower/setid.mat" %DATA_DIR)

# 'D_siftbdy', 'D_siftint', 'D_hsv', 'D_hog',
ds2_distances = loadmat("%s/102_cat_flower/distancematrices102.mat" %DATA_DIR)

ds2_labels = loadmat("%s/102_cat_flower/imagelabels.mat" %DATA_DIR)

# Prepare

In [6]:
'''KERNELS = {
    'gaussian': get_gaussian_kernel,
    'poly': get_ploynomial_kernel,
    'dist': get_distances_to_kernel
}'''

def get_gaussian_kernel(X, width=.2):
    return rbf_kernel(X, gamma=1.0/width)

def get_ploynomial_kernel(X, deg=2):
    return polynomial_kernel(X, degree=deg)

def get_distances_to_kernel(X):
    return (-X/X.mean())

def get_kernels(X, poly=False, gauss=False, distk=False):
    kernels = []
    
    if poly:
        degrees = [1,2,3]
        
        for deg in degrees:
            kernels.append(get_ploynomial_kernel(X, deg=deg))
            
            
    if gauss:
        widths = map(lambda x: 2**x, np.arange(-3,6))
        
        for w in widths:
            kernels.append(get_gaussian_kernel(X, width=w))
            
            
    if distk:
        kernels.append(get_distances_to_kernel(X))
        
    return kernels
    
    

# Dummy

In [30]:
x1 = np.random.normal(0,1,100)
y1 = np.repeat(1,100)
x2 = np.random.normal(50,1,100)
y2 = np.repeat(-1,100)

N = 200
x = np.concatenate((x1,x2))
y = np.concatenate((y1,y2))

K = [get_ploynomial_kernel(x.reshape((-1,1)), i) for i in range(3)]
print K[0].shape

P = 3

(200, 200)


# BEMKL

In [51]:
# Helper Functions
log = np.log
gamma = sc.special.gamma
digamma = sc.special.digamma
det = np.linalg.det
diag = np.diag
outer = np.outer
tr = np.trace
dot =  np.dot
concat = np.concatenate
normal = np.random.normal
arr = np.array


# Hyper Parameters (prior)
sparse = False

alpha_lambda, beta_lambda = 1.0,1.0
alpha_gamma, beta_gamma = 1.0,1.0
alpha_omega, beta_omega = 1.0,1.0

if sparse:
    alpha_omega, beta_omega = 10.0**-10, 10.0**10

rv_lambda = np.random.gamma(alpha_lambda, 1.0/beta_lambda, N) 
a = np.array([np.random.normal(0, 1.0/rv_lambda[i], 1) for i in range(N)])

rv_gamma = np.random.gamma(alpha_gamma, 1.0/beta_gamma, 1) 
rv_b = np.random.normal(0, 1.0/rv_gamma, 1)

rv_omega = np.random.gamma(alpha_omega, 1.0/beta_omega, P) 
rv_e = np.array([np.random.normal(0, 1.0/rv_omega[i], 1) for i in range(P)])

# The conditionals parameters
p_alpha_lambda = np.repeat(alpha_lambda, N)
p_beta_lambda = np.repeat(beta_lambda, N)
p_alpha_gamma = alpha_gamma
p_beta_gamma = beta_gamma
p_alpha_omega = np.repeat(alpha_omega, P)
p_beta_omega = np.repeat(alpha_omega, P)

mu_a = np.repeat(0, N)
mu_b = 0
mu_e = np.repeat(0, P)
cov_a = diag(1.0/rv_lambda)
cov_e = np.diag(1.0 / rv_omega)

mu_g = np.vstack([dot(a.T, K[i]) for i in range(P)])
print mu_g.shape
G = np.array([[np.random.normal(mu_g[i,j],1, 1)[0] for j in range(N)] for i in range(P)])
print 'G', G.shape
cov_g = [(outer(G[:,i],G[:,i])- outer(mu_g[:,i], mu_g[:,i])) 
                  for i in range(N)]

mu_b_e = np.concatenate(([mu_b],mu_e))
cov_b_e = np.vstack(([rv_b if i==0 else 0 for i in range(P+1)],
                np.hstack((np.zeros((P,1)),cov_e))))
print 'COV_b_e', cov_b_e.shape

omega = np.random.gamma(alpha_omega, 1.0/beta_omega, P)
e = concat([normal(0,omega[i],1) for i in range(P)])
print 'e', e.shape
mu_f = np.array([dot(e,G[:,i])+ rv_b for i in range(N)])

E_a2 = np.diag(cov_a) + mu_a**2 
E_b2 = 1.0 / rv_gamma + mu_b
E_e2 = np.diag(cov_e) + mu_e**2

thresh = 1.0 * 10**-4
ELBO_init = 0

K2 = np.sum([dot(K[i], K[i].T) for i in range(P)])


while True: 
    # Update the parameters:
    ########################
    p_alpha_lambda = np.repeat(alpha_lambda + 0.5, N)
    p_beta_lambda = (beta_lambda**-1 + E_a2/2.0)**-1
    p_alpha_gamma = alpha_gamma + 0.5
    p_beta_gamma = (beta_gamma**-1 + E_b2/2.0)**-1
    p_alpha_omega = np.repeat(alpha_omega + 0.5, P)
    p_beta_omega = (beta_omega**-1 + E_e2/2.0)**-1
    
    E_lambda = p_alpha_lambda/p_beta_lambda
    E_gamma = p_alpha_gamma/p_beta_gamma
    E_omega = p_alpha_omega/p_beta_omega

    mu_a = dot(cov_a, np.sum([dot(K[i], mu_g[i,:]) for i in range(P)]))
    cov_a = diag(E_lambda) + K2
    
    mu_g = np.array([dot(cov_g[i],(dot([K[j][i,:] for j in range(P)], mu_a) + mu_f[i]* mu_e + mu_b_e))
                for i in range(N)])
    cov_g = (np.identity(P) + cov_e + np.outer(mu_e,mu_e))**-1
    
    mu_b_e = dot(cov_b_e, [mu_f.sum() if i==0 else dot(mu_g,mu_f) for i in range(P+1)])
    cov_b_e[0,0] = E_gamma + N
    cov_b_e[0,1:] = mu_g.T.sum(axis=1)
    cov_b_e[1:,0] = mu_g.sum(axis=1)
    cov_b_e[1:,1:] = diag(E_omega) + dot(G,G.T)
    cov_b_e = cov_b_e**-1
    
    mu_f = np.array([dot(mu_b_e[1:], mu_g[:,i]) for i in range(N)])
    
    # Calculate the ELBO
    ###################
    # E(log(p(lambda)))
    E_log_lambda = np.array([log(np.random.gamma(p_alpha_lambda[i], p_beta_lambda[i], 100)).mean()
                             for i in range(N)])
    E_lp_lambda = ((alpha_lambda - 1) * E_log_lambda - 1.0/beta_lambda * E_lambda - log(gamma(alph_lambda))
            - alpha_lambda * log(beta_lambda)).sum()

    # E(log(p(a|lambda)))
    E_lp_a_lambda = ( - 0.05 * (diag(E_lambda) * Cov_a).diag.sum() - 0.5 * N *log(2.0*np.pi) 
                     + 0.05 * log(np.prod(E_lambda)))

    # E(log(p(G|a,K)))
    k_i = np.array([K[j][i,:] for j in range(P)])
    E_lp_G = np.sum([-.5 * (diag(cov_g[i])+mu_g[:,i]**2).sum() 
                     + np.trace(dot(k_i.T, dot(mu_g[:,i],mu_a)))
                     - 0.5 * np.trace( (k_i**2).sum() * (cov_a + outer(mu_a,mu_a) )) 
                     for i in range(N)] )

    # E(log(p(gamma)))
    E_lp_gamma = ((alpha_gamma - 1) * E_log_gamma - E_gamma/beta_gamma - log_gamma_alph_gamma 
                  - alpha_gamma * log(beta_gamma))

    # E(log(p(b|gamma)))
    E_lp_b = (- 0.5 * E_gamma * E_b2 - 0.5 *log(2.0*np.pi) + 0.5 * log(E_gamma))

    # E(log(p(omega)))
    E_lp_omega = ((alpha_omega - 1) * E_log_omega - E_omega/beta_omega - log_gamma_alph_omega 
                  - alpha_omega * log(beta_omega) ).sum()

    # E(log(p(e|omega)))
    E_lp_e = ( -0.5 * (diag(E_omega) * Cov_e).diag.sum() - 0.5 * P * log(2.0*np.pi)
             + 0.05 * log(np.prod(E_omega)))

    # E(log(p(f|e,b,G)))
    E_lp_f = (-0.5 * E_f2 + (dot(E_e,E_G) + b) * E_f - 0.5 * ([dot(Cov_e,Cov_g).diag.sum() for i in range(N)]
            + 2 * dot(E_b_e, E_G) + E_b2) - 0.5* log(2.0*np.pi)).sum()


    # E(log(q(lambda)))
    E_lq_lambda = (-p_alpha_lambda - log(p_beta_lambda) - log(gamma(p_alpha_lambda)) 
                   - (1-p_alpha_lambda) * digamma(p_alpha_lambda)).sum()

    # E(log(q(a)))
    E_lq_a = -0.5 * N * (log(2.0*np.pi)+1) - 0.5 * log(det(cov_a))

    # E(log(q(G)))
    E_lq_G = (-0.5 * P *(log(2.0*np.pi)+1) - 0.5 * np.array([log(det(cov_g[i])) for i in range(N)])).sum()

    # E(log(q(gamma)))
    E_lq_gamma = (-p_alpha_gamma - log(p_beta_gamma) - log(gamma(p_alpha_gamma)) 
                   - (1-p_alpha_gamma) * digamma(p_alpha_gamma))

    # E(log(q(omega)))
    E_lq_omega = (-p_alpha_omega - log(p_beta_omega) - log(gamma(p_alpha_omega)) 
                   - (1-p_alpha_omega) * digamma(p_alpha_omega)).sum()

    # E(log(q(b,e)))
    E_lq_b_e = (-0.5 * (P+1) *(log(2.0*np.pi)+1) - 0.5 * log(det(cov_b_e)) )

    # E(log(q(f)))
    E_lq_f = (-0.5 * (log(2.0*np.pi) + cov_f) - log(Z)).sum()


    ELBO = (E_lp_lambda + E_lp_a_lambda + E_lp_G + E_lp_gamma + E_lp_b + E_lp_omega + E_lp_e + E_lp_f
            - E_lq_lambda - E_lq_a - E_lq_G - E_lq_gamma - E_lq_omega - E_lq_b_e - E_lq_f)
    
    if np.abs(ELBO - ELBO_init) < thresh:
        print 'Convergence'
        break
        
    ELBO_init = ELBO

    
    

(3, 200)
G (3, 200)
COV_b_e (4, 4)
e (3,)


ValueError: operands could not be broadcast together with shapes (3,200) (3,) 

In [47]:
np.hstack((np.zeros((P,1)),cov_e))

array([[ 0.        ,  0.2110523 ,  0.        ,  0.        ],
       [ 0.        ,  0.        , 11.3719084 ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.70242415]])

In [46]:
cov_e.shape
np.zeros((P)).shape

(3,)