In [7]:
import numpy as np
from scipy.spatial.distance import pdist, squareform
import numpy.matlib as nm

In [4]:
class SVGD():

    def __init__(self):
        pass
    
    def svgd_kernel(self, theta, h = -1):
        sq_dist = pdist(theta)
        pairwise_dists = squareform(sq_dist)**2  # 转化为距离矩阵
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(theta.shape[0]+1))

        # compute the rbf kernel
        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)
    
 
    def update(self, x0, lnprob, n_iter = 1000, stepsize = 1e-3, bandwidth = -1, alpha = 0.9, debug = False):
        # Check input
        if x0 is None or lnprob is None:
            raise ValueError('x0 or lnprob cannot be None!')
        
        theta = np.copy(x0) 
        
        # adagrad with momentum
        fudge_factor = 1e-6
        historical_grad = 0
        for iter in range(n_iter):
            if debug and (iter+1) % 1000 == 0:
                print('iter ' + str(iter+1))
            
            lnpgrad = lnprob(theta)
            # calculating the kernel matrix
            kxy, dxkxy = self.svgd_kernel(theta, h = -1)  
            grad_theta = (np.matmul(kxy, lnpgrad) + dxkxy) / x0.shape[0]  
            
            # adagrad 
            if iter == 0:
                historical_grad = historical_grad + grad_theta ** 2
            else:
                historical_grad = alpha * historical_grad + (1 - alpha) * (grad_theta ** 2)
            adj_grad = np.divide(grad_theta, fudge_factor+np.sqrt(historical_grad))
            theta = theta + stepsize * adj_grad 
            
        return theta

In [8]:
# 多元正态分布
class MVN:
    def __init__(self, mu, A):
        self.mu = mu
        self.A = A # 协方差的逆矩阵
    
    def dlnprob(self, theta):
        return -1*np.matmul(theta-nm.repmat(self.mu, theta.shape[0], 1), self.A)
    
if __name__ == '__main__':
    A = np.array([[0.2260,0.1652],[0.1652,0.6779]])
    mu = np.array([-0.6871,0.8010])
    
    model = MVN(mu, A)
    
    x0 = np.random.normal(0,1, [10,2]);
    theta = SVGD().update(x0, model.dlnprob, n_iter=1000, stepsize=0.01)
    
    print("ground truth: ", mu)
    print("svgd: ", np.mean(theta,axis=0))

ground truth:  [-0.6871  0.801 ]
svgd:  [-0.6879661   0.79851612]


In [23]:
gamma = np.load('gamma.npy')
gamma.shape

(40, 7, 3)

In [24]:
x0 = gamma.reshape(40,21)

In [25]:
A = np.diag(np.ones(21))
mu = np.zeros(21)

model = MVN(mu, A)

# x0 = np.random.normal(0,1, [10,2]);
theta = SVGD().update(x0, model.dlnprob, n_iter=1000, stepsize=0.01)

In [27]:
theta.shape

(40, 21)

In [28]:
theta.mean(axis=0)

array([-2.65748908e-03,  3.31524480e-03, -9.84353857e-04,  3.88034738e-04,
       -1.12625615e-03, -1.02235103e-05, -3.87751725e-03, -6.36146787e-03,
       -1.31551192e-03,  9.78806257e-04,  3.45802849e-03,  2.08531253e-03,
       -7.15610405e-04,  1.98173440e-04,  3.37135675e-03,  4.36028007e-03,
       -2.92087608e-03,  1.86053416e-03, -4.25770097e-03, -2.12042776e-03,
        5.10532445e-03])

In [31]:
np.cov(theta.T)[:3,:3]

array([[ 0.16983038, -0.00749789,  0.00324275],
       [-0.00749789,  0.16577164,  0.00060032],
       [ 0.00324275,  0.00060032,  0.16763988]])

In [12]:
nm.repmat(mu, 2, 1)

array([[-0.6871,  0.801 ],
       [-0.6871,  0.801 ]])