# Gibbs sampling on random multivariate Gaussians.

In [1]:
from copy import copy
from functools import partial
import numpy as np
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as scs
from scipy.interpolate import griddata

# Plotting functions.

In [2]:
def get_mesh_bounds(models, num_sigma=3):
    covs = [model.cov for model in models]
    # For three std away.
    return np.sqrt(np.max(covs)) * num_sigma

def get_mesh_xy_and_positions(models, res=.01, num_sigma=3, plot_bounds=None):
    if plot_bounds is not None:
        mesh_lb, mesh_ub = plot_bounds
    else:
        mesh_ub = get_mesh_bounds(models, num_sigma=num_sigma) + res
        mesh_lb = - mesh_ub 
    x, y = np.mgrid[mesh_lb:mesh_ub:res, mesh_lb:mesh_ub:res]
    pos = np.empty(x.shape + (2,))
    pos[:, :, 0] = x; pos[:, :, 1] = y
    return x, y, pos

def plot_contours2d(models, ax=None, alpha=0.3, plot_bounds=None):
    assert len(models[0].mean) == 2
    if ax is None:
        fig = plt.figure()
        ax = plt.gca()
    if plot_bounds is not None:
        x, y, pos = get_mesh_xy_and_positions(models, plot_bounds=plot_bounds)
    else:
        x, y, pos = get_mesh_xy_and_positions(models, num_sigma=3)
                                            
    levels = [0.01]# + list(np.arange(0.2, 1.2, 0.1))
    cmap = ['viridis', 'cool', 'hot']
    for i, model in enumerate(models):
        #levels = [ model.pdf(point) for point in points ]
        ax.contour(x, y, model.pdf(pos), levels, cmap=cmap[i % len(cmap)], label='model %d' % i,
                   alpha=alpha)
        ax.hold(True)
    ax.set_aspect('equal')#, 'datalim')
    ax.tick_params(labelsize='xx-small')
    #ax.set_xbound(bounds[0], bounds[1])
    #ax.set_ybound(bounds[0], bounds[1])
    #ax.legend()
    #plt.colorbar(ax)
    bounds = [np.min(x), np.max(x)]
    return ax, bounds

def plot_samples(ax, samples, alpha=0.01, s=2, plot_bounds=(-4, 4), 
                 title_str=None):
    #ax.autoscale(False) # To avoid that the scatter changes limits
    samples = np.asarray(samples)
    ax.scatter(samples[:, 0], samples[:, 1], s=s, zorder=1, alpha=alpha)
    ax.set_aspect('equal')
    if title_str is not None:
        ax.set_title(title_str, fontsize='xx-small')
    ax.tick_params(labelsize='xx-small')
    ax.set_xbound(plot_bounds[0], plot_bounds[1])
    ax.set_ybound(plot_bounds[0], plot_bounds[1])
    return ax, plot_bounds

# Make model functions.

In [3]:
# Make random covariances using the wishart distribution
def make_random_models(num_models=2, ndim=2, df=2, high_cov_prior=False):
    if df < ndim:
        print 'WARNING: df (degrees of freedom) should be at least ndim, currently %d < %d. Automatically updated to df == num_models' % (df, num_models)
        df = ndim
        
    means = [[0,0]] * num_models
    scale = np.diag([5, 0.1])
    scale = np.array([[1, 0.9], [0.9, 1]])
    if not high_cov_prior:
        covs = scs.wishart.rvs(df=df, scale=1./df * np.eye(ndim), 
                               size=num_models)
    else:
        covs = []
        for i in range(num_models):
            if i % 2 == 0:
                scale = np.array([[1, 0.9], [0.9, 1]])
                cov = scs.wishart.rvs(df=df, scale=1./df * scale, size=1)
            else:
                scale = np.array([[1, -0.8], [-0.8, 1]])
                cov = scs.wishart.rvs(df=df, scale=1./df * scale, size=1)
            covs.append(cov)
    assert len(covs) == num_models
    return [scs.multivariate_normal(mean, cov)
            for mean, cov in zip(means, covs)]
    # np.concatenate((covs, covs_1), 0)

In [4]:
scs.wishart.rvs?

In [23]:
# Explore the different parameters on wishart. 
ntrials = 3
fig, axes = plt.subplots(2, ntrials*2)
for j, high_cov_prior in enumerate([True, False]):
    for i in range(ntrials):
        for k, df in enumerate([2, 100]):
            models = make_random_models(num_models=3, high_cov_prior=high_cov_prior, df=df)
            _, bounds = plot_contours2d(models, axes[j, k*ntrials + i])
plt.tight_layout()

<IPython.core.display.Javascript object>

In [6]:
n = 10000
mat = scs.wishart.rvs(n, 1./n*np.eye(3))
np.linalg.svd(mat)

(array([[-0.5748447 ,  0.63928088, -0.5107578 ],
        [ 0.55649158, -0.15218512, -0.81679668],
        [-0.59989224, -0.75376365, -0.26827162]]),
 array([ 1.03550371,  1.00436225,  0.98451592]),
 array([[-0.5748447 ,  0.55649158, -0.59989224],
        [ 0.63928088, -0.15218512, -0.75376365],
        [-0.5107578 , -0.81679668, -0.26827162]]))

# Compute conditionals

In [7]:
def compute_cond_moments(model, cond_val=None, cond_ax=None):
    # assumes two dimensional for now.
    assert len(model.mean) == 2
    # [x, y]T ~ N([a, b]T, [[A, C], [CT, B]]) 
    # => x|y ~ N(a + C B-1 (y-b), A - C B-1 CT)
    pred_ax = abs(1 - cond_ax)
    C = model.cov[1, 0]
    B = model.cov[cond_ax, cond_ax]
    mean = model.mean[pred_ax] + C * 1./B * (cond_val-model.mean[cond_ax])
    cov = model.cov[pred_ax, pred_ax] - C * 1./B * C 
    #print mean, cov, 'conditioned on cond_ax=%d, cond_val=%.2f' % (cond_ax, cond_val)
    return mean, cov
def get_cond_norm(model, **kwargs):
    mean, cov = compute_cond_moments(model, **kwargs)
    return multivariate_normal(mean, cov)
def sample_cond_norm(model, **kwargs):
    mean, cov = compute_cond_moments(model, **kwargs)
    return scs.multivariate_normal.rvs(mean, cov)

In [8]:
np.linalg.solve?

# Mixture, product of Gaussians

In [9]:
def sample_mixture_gaussian(ax, models, nsamples=1000, plot_bounds=None):
    samples = []
    for i in range(nsamples):
        # Randomly chose model.
        model_idx = np.random.choice(len(models), size=1)[0]
        model = models[model_idx]
        sample = scs.multivariate_normal.rvs(model.mean, model.cov)
        samples.append(sample)
    samples = np.asarray(samples)
    plot_samples(ax, samples)
    ax.set_title('mixture of Gauss', fontsize='small')
    ax.tick_params(labelsize='small')
    if plot_bounds is not None:
        for fn in [ax.set_xbound, ax.set_ybound]:
            fn(plot_bounds[0], plot_bounds[1])
    return samples

In [10]:
# Check the mixture of Gaussians.
ntrials = 3
fig, axes = plt.subplots(1, ntrials)
#axes = np.ravel(axes)
for i in range(ntrials):
    models = make_random_models(num_models=3)
    ax, ref_plt_bds = plot_contours2d(models, axes[i])
    samples = sample_mixture_gaussian(ax, models, nsamples=5000, plot_bounds=ref_plt_bds)
plt.show()

<IPython.core.display.Javascript object>

In [11]:
# Product of gaussians, ref roweis's notes on gaussid
def gauss_prod(models):
    print '# of models', len(models)
    mean = models[0].mean
    C = models[0].cov
    c = models[0].mean
    inv = np.linalg.inv
    for model in models[1:]:
        # update
        B = C
        b = c
        #print b, B
        A = model.cov
        a = model.mean
        C = inv(inv(A) + inv(B))
        c = (np.dot(np.dot(C, inv(A)), a) + 
             np.dot(np.dot(C, inv(B)), b))
    return scs.multivariate_normal(c, C)

In [12]:
# Check: iteratively add models and check how the product updates.
num_models = 3
fig, axes = plt.subplots(1, num_models*2)
axes = np.ravel(axes)
models = make_random_models(num_models=num_models)
ax, ref_plt_bds = plot_contours2d(models, axes[0])
ax, _ = plot_contours2d([models[0]], axes[1], plot_bounds=ref_plt_bds)
ax.set_title('model 0', fontsize='x-small')
for i in range(1, num_models):
    ax, _ = plot_contours2d([models[i]], axes[i*2], plot_bounds=ref_plt_bds)
    ax.set_title('model %d' % i, fontsize='x-small')
    prod = gauss_prod(models[:(i+1)])
    ax, _ = plot_contours2d([prod], axes[i*2 + 1], plot_bounds=ref_plt_bds)
    ax.set_title('prod uptill %d' % i, fontsize='x-small')
plt.show()

<IPython.core.display.Javascript object>

# of models 2
# of models 3


In [13]:
# Check the product of different sets of random Gaussians.
ntrials = 3
fig, axes = plt.subplots(2, ntrials)
#axes = np.ravel(axes)
for i in range(ntrials):
    models = make_random_models(num_models=3)
    ax, plot_bounds = plot_contours2d(models, axes[0, i])
    prod = gauss_prod(models)
    ax, plot_bounds = plot_contours2d([prod], axes[1, i], 
                                      plot_bounds=plot_bounds)

plt.show()

<IPython.core.display.Javascript object>

# of models 3
# of models 3
# of models 3


# MH sampling

In [14]:
def poe_pdf(models, x):
    product = 1
    for model in models:
        product *= model.pdf(x)
    return np.power(product, 1./len(models))

def stoachastic_poe_pdf(x, models_to_subsample=None, subsample_count=None, 
                        preselected_models=None):
    if preselected_models is not None and subsample_count is not None:
        assert False, 'If preselected models are given, assumes all would be used and not subsampled'
    if preselected_models is not None and models_to_subsample is not None:
        assert False, 'If preselected models are given, assumes would not need another set of models to choose from.'
    if preselected_models is None and models_to_subsample is None:
        assert False, 'Not models to choose from.'
    if models_to_subsample is None and subsample_count == 0:
        assert False, 'subsample_count can not be zero.  Need to at least sample one model.'
    poe_models = None
    if preselected_models is not None:
        poe_models = preselected_models
    elif models_to_subsample is not None and subsample_count is None:
        poe_models = models_to_subsample
    elif models_to_subsample is not None:
        inds = np.random.choice(len(models_to_subsample), subsample_count, replace=False)
        poe_models = [models_to_subsample[ind] for ind in inds]
    else:
        assert False, 'Unexpected setup.'
    return poe_pdf(poe_models, x), poe_models

def mh_product_of_gaussians(models, axes, model_subsample_count=None, sigma2=0.1, init_val=None,
                            num_samples=1000, burnin=500, plot_bounds=None):
    if not isinstance(axes, list):
        axes = [axes]
    num_samples = num_samples + burnin
    if model_subsample_count is None:
        model_subsample_count = len(models)
    # Plot background
    for ax in axes:
        plot_contours2d(models, ax)
    if init_val is None:
        model = models[np.random.randint(len(models))]
        init_val = scs.multivariate_normal.rvs(model.mean, model.cov)
    ndim = models[0].mean.size
    samples = [init_val]
    
    for i in range(num_samples):
        # Propose sample
        sample = scs.multivariate_normal.rvs(samples[-1], sigma2 * np.identity(ndim))
        current_pdf, preselected_models = stoachastic_poe_pdf(
            sample, models_to_subsample=models, 
            subsample_count=model_subsample_count)
        previous_pdf, _ = stoachastic_poe_pdf(
            samples[-1], preselected_models=preselected_models)
        ratio = current_pdf / previous_pdf
        accept_prob = np.minimum(1, ratio)
        alpha = np.random.random_sample()
        if alpha <= accept_prob:
            samples.append(sample)
        else:
            samples.append(samples[-1])
    init_str = 'init_val=(%.2f,%.2f)' % (init_val[0], init_val[1]) 
    title_prefix = '%d-model '% model_subsample_count
    plot_samples(axes[0], samples[burnin:], plot_bounds=plot_bounds,
                 title_str=title_prefix + 'MH, prop var=%.4f, w/o burn-in, %s' % (sigma2, init_str))
    if len(axes) > 1:
        plot_samples(axes[1], samples, plot_bounds=plot_bounds,
                     title_str=title_prefix + 'MH, prop var=%.4f, %s' % (sigma2, init_str))
    return samples

In [15]:
# test MH
num_models=3
models = make_random_models(num_models=num_models, high_cov_prior=True) 
fig, axes = plt.subplots(1, 4)
ax, ref_plt_bds = plot_contours2d(models, axes[0])
nsamples=5000
nburnin=500

# plot product of Gaussians
prod = gauss_prod(models)
ax, _ = plot_contours2d([prod], axes[0], plot_bounds=ref_plt_bds)
samples = scs.multivariate_normal.rvs(prod.mean, prod.cov, 
                                      size=nsamples)
print '# of samples', samples.shape
ax, _ = plot_samples(samples=samples, ax=axes[0], plot_bounds=ref_plt_bds)

# MH on full product
sigma2=0.2
samples = mh_product_of_gaussians(
                models, axes[1], model_subsample_count=None, 
                sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                plot_bounds=ref_plt_bds)

# MH on stochastically sampled one model at a time
sigma2=0.2
samples = mh_product_of_gaussians(
                models, axes[2], model_subsample_count=2, 
                sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                plot_bounds=ref_plt_bds)

# MH on stochastically sampled one model at a time
sigma2=0.2
samples = mh_product_of_gaussians(
                models, axes[3], model_subsample_count=1, 
                sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                plot_bounds=ref_plt_bds)
# Samples from mixture
ax, _ = plot_contours2d(models, axes[3], plot_bounds=ref_plt_bds)
samples = sample_mixture_gaussian(ax=ax, models=models, nsamples=nsamples, plot_bounds=ref_plt_bds)
        
plt.show()

<IPython.core.display.Javascript object>

# of models 3
# of samples (5000, 2)




# Gibbs sampling

In [16]:
def random_pick(models=models, **kwargs):
    rand_ind = np.random.randint(len(models))
    return models[rand_ind]

def fixed_scan_0(models=models, previous_model=None, **kwargs):
    if previous_model is None:
        model_idx = 0
    else:
        model_idx = models.index(previous_model) + 1
    return models[model_idx % len(models)]

def fixed_scan_1(models=models, previous_model=None, **kwargs):
    if previous_model is None:
        model_idx = 1
    else:
        model_idx = models.index(previous_model) + 1
    return models[model_idx % len(models)]

def run_gibbs(ax, models, model_pick_fn, init_val=None, 
              nsamples=5000, nburnin=1000, plot_bounds=None):
    # Plot background
    ax, plot_bounds = plot_contours2d(models, ax, plot_bounds=plot_bounds)
    # Initialization
    ndim = models[0].mean.size
    init_val = np.random.random(ndim)
    cond_ax = np.random.randint(ndim)
    title_str = 'init='
    for val in init_val:
        title_str += '%.2f, ' % val
    samples = [init_val]
    niters = nsamples + nburnin
    model = None
    for i in range(niters):
        if i % 5000 == 0:
            print 'sample', i,
        cur_sample = samples[-1].copy()
        model = model_pick_fn(models=models, previous_model=model)
        #print 'model idx', models.index(model)
        sampled_ax = np.random.randint(ndim)
        assert ndim == 2
        cond_ax = 1 if sampled_ax == 0 else 0
        assert cond_ax != sampled_ax
        cond_val = cur_sample[cond_ax]
        sampled_val = sample_cond_norm(model, cond_val=cond_val, 
                                       cond_ax=cond_ax)
        cur_sample[sampled_ax] = sampled_val
        samples.append(cur_sample)
        
    samples = np.asarray(samples)
    samples = samples[nburnin:]
    #print samples.shape
    plot_samples(ax, samples, plot_bounds=plot_bounds)
    ax.set_aspect('equal')
    ax.set_title(title_str, fontsize='small')
    ax.tick_params(labelsize='small')
    for fn in [ax.set_xbound, ax.set_ybound]:
        fn(plot_bounds[0], plot_bounds[1])

In [17]:
def run_gibbs_job_all(ntrails=1, num_models=3, nsamples=5000, nburnin=1000, trial_models=None):
    # - ensemble (showing individual experts)
    # - product 
    # - sampled product
    # - Gibbs (random-scan)
    # - Gibbs fixed-scan
    # - Gibbs fixed-scan (another ordering)
    # - mixture samples
    # - MH on full product
    # - MH on product of two models
    # - MH on single model
    
    fig, axes = plt.subplots(10, ntrials, figsize=(20,12))
    #if trial_models is not None:
    #    num_models = len(trial_models)
    #axes = np.ravel(axes)
    if trial_models is None:
        trial_models = [make_random_models(num_models=num_models) for i in range(ntrails)]
    for i in range(ntrials):
        models = trial_models[i]
        ax, ref_plt_bds = plot_contours2d(models, axes[0, i])

        # plot product of Gaussians
        prod = gauss_prod(models)
        ax, _ = plot_contours2d([prod], axes[1, i], plot_bounds=ref_plt_bds)
        samples = scs.multivariate_normal.rvs(prod.mean, prod.cov, 
                                              size=nsamples)
        print '# of samples', samples.shape
        ax, _ = plot_samples(samples=samples, ax=axes[2, i], plot_bounds=ref_plt_bds)

        # plot gibbs samples
        run_gibbs(axes[3, i], models=models, model_pick_fn=random_pick,
                  nsamples=nsamples, nburnin=nburnin)
        
        # fixed scan
        run_gibbs(axes[4, i], models=models, model_pick_fn=fixed_scan_0,
                  nsamples=nsamples, nburnin=nburnin)
        
        # fixed scan
        run_gibbs(axes[5, i], models=models, model_pick_fn=fixed_scan_1,
                  nsamples=nsamples, nburnin=nburnin)
        
        # Samples from mixture
        ax, _ = plot_contours2d(models, axes[6, i], plot_bounds=ref_plt_bds)
        samples = sample_mixture_gaussian(ax=ax, models=models, nsamples=nsamples, plot_bounds=ref_plt_bds)
        
        # MH on full product
        sigma2=0.2
        samples = mh_product_of_gaussians(
                        models, axes[7, i], model_subsample_count=None, 
                        sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                        plot_bounds=ref_plt_bds)

        # MH on stochastically sampled one model at a time
        sigma2=0.2
        samples = mh_product_of_gaussians(
                        models, axes[8, i], model_subsample_count=2, 
                        sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                        plot_bounds=ref_plt_bds)

        # MH on stochastically sampled one model at a time
        sigma2=0.2
        samples = mh_product_of_gaussians(
                        models, axes[9, i], model_subsample_count=1, 
                        sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                        plot_bounds=ref_plt_bds)
             
    plt.tight_layout()
    plt.show()
    return trial_models

In [18]:
def run_gibbs_job(ntrails=1, num_models=3, nsamples=5000, nburnin=1000, trial_models=None):
    # - ensemble (showing individual experts)
    # - product 
    # - sampled product
    # - Gibbs (random-scan)
    # - mixture samples
    # - MH on full product
    # - MH on product of 2 models
    # - MH on single model
    
    fig, axes = plt.subplots(8, ntrials, figsize=(9, 24))
    #if trial_models is not None:
    #    num_models = len(trial_models)
    #axes = np.ravel(axes)
    if trial_models is None:
        trial_models = [make_random_models(num_models=num_models) for i in range(ntrails)]
    for i in range(ntrials):
        models = trial_models[i]
        ax, ref_plt_bds = plot_contours2d(models, axes[0, i])

        # plot product of Gaussians
        prod = gauss_prod(models)
        ax, _ = plot_contours2d([prod], axes[1, i], plot_bounds=ref_plt_bds)
        samples = scs.multivariate_normal.rvs(prod.mean, prod.cov, 
                                              size=nsamples)
        print '# of samples', samples.shape
        ax, _ = plot_samples(samples=samples, ax=axes[2, i], plot_bounds=ref_plt_bds)

        # plot gibbs samples
        run_gibbs(axes[3, i], models=models, model_pick_fn=random_pick,
                  nsamples=nsamples, nburnin=nburnin)

        # Samples from mixture
        ax, _ = plot_contours2d(models, axes[4, i], plot_bounds=ref_plt_bds)
        samples = sample_mixture_gaussian(ax=ax, models=models, nsamples=nsamples, plot_bounds=ref_plt_bds)
        
        # MH on full product
        sigma2=0.2
        samples = mh_product_of_gaussians(
                        models, axes[5, i], model_subsample_count=None, 
                        sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                        plot_bounds=ref_plt_bds)

        # MH on stochastically sampled one model at a time
        sigma2=0.2
        samples = mh_product_of_gaussians(
                        models, axes[6, i], model_subsample_count=2, 
                        sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                        plot_bounds=ref_plt_bds)

        # MH on stochastically sampled one model at a time
        sigma2=0.2
        samples = mh_product_of_gaussians(
                        models, axes[7, i], model_subsample_count=1, 
                        sigma2=sigma2, init_val=None, num_samples=nsamples, burnin=nburnin,
                        plot_bounds=ref_plt_bds)
             
    plt.tight_layout()
    plt.show()
    return trial_models

In [24]:
ntrails = 3
num_models = 3
trial_models = [make_random_models(num_models=num_models) for i in range(ntrails)]
trial_models = run_gibbs_job(trial_models=trial_models)

<IPython.core.display.Javascript object>

# of models 3
# of samples (5000, 2)
sample 0 sample 5000 # of models 3
# of samples (5000, 2)
sample 0 sample 5000



 # of models 3
# of samples (5000, 2)
sample 0 sample 5000


In [25]:
ntrails = 3
num_models = 3
trial_models = [make_random_models(num_models=num_models) for i in range(ntrails)]
trial_models = run_gibbs_job_all(trial_models=trial_models)

<IPython.core.display.Javascript object>

# of models 3
# of samples (5000, 2)
sample 0 sample 5000 sample 0 sample 5000 sample 0 sample 5000 # of models 3
# of samples (5000, 2)
sample 0 sample 5000 sample 0 sample 5000 sample 0 sample 5000 # of models 3
# of samples (5000, 2)
sample 0 sample 5000 sample 0 sample 5000 sample 0 sample 5000


In [26]:
ntrails = 3
num_models = 3
trial_models = [make_random_models(num_models=num_models, high_cov_prior=True) for i in range(ntrails)]
trial_models = run_gibbs_job(trial_models=trial_models)

<IPython.core.display.Javascript object>

# of models 3
# of samples (5000, 2)
sample 0 sample 5000 # of models 3
# of samples (5000, 2)
sample 0 sample 5000



 # of models 3
# of samples (5000, 2)
sample 0 sample 5000
