In [None]:
import matplotlib.pyplot as plt
import numpy as np

from bayesmixpy import run_mcmc

In [None]:
import os
os.environ["BAYESMIX_EXE"] = "../../build/run_mcmc"

# Load the old faithful dataset

In [None]:
data = np.loadtxt("../../resources/datasets/faithful.csv", delimiter=',')

## Modelling choices

We consider a mixture of bivariate normals, with unknown mean and covariance matrix. As a prior for the mixing measure we consider either a Dirichlet or a Pitman-Yor process with fixed parameters.

The centering measure is a normal-inverse-Wishart distribution, whose parameters are chosen via an empirical Bayesian procedure.

In [None]:
mu0 = np.mean(data, axis=0)
mu0

In [None]:
dp_params = """
fixed_value {
    totalmass: 1.0
}
"""

py_params = """
fixed_values {
    strength: 1.0
    discount: 0.2
}
"""


g0_params = """
fixed_values {
    mean {
        size: 2
        data: 3.484
        data: 3.487
    }
    var_scaling: 0.01
    deg_free: 5
    scale {
        rows: 2
        cols: 2
        data: 1.0
        data: 0.0
        data: 0.0
        data: 1.0
        rowmajor: false
    }
}
"""

neal2_algo = """
algo_id: "Neal2"
rng_seed: 20201124
iterations: 2000
burnin: 1000
init_num_clusters: 3
"""

In [None]:
xgrid = np.linspace(0, 6, 50)
xgrid, ygrid = np.meshgrid(xgrid, xgrid)
dens_grid = np.hstack([xgrid.reshape(-1, 1), ygrid.reshape(-1, 1)])

In [None]:
eval_dens_dp, _, _, best_clus_dp = run_mcmc(
    "NNW", "DP", data, g0_params, dp_params, neal2_algo, 
    dens_grid, return_clusters=False, return_num_clusters=False,
    return_best_clus=True)

eval_dens_py, _, _, best_clus_py = run_mcmc(
    "NNW", "PY", data, g0_params, py_params, neal2_algo, 
    dens_grid, return_clusters=False, return_num_clusters=False,
    return_best_clus=True)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(11, 5))

mean_dens_dp = np.mean(eval_dens_dp, axis=0)
mean_dens_py = np.mean(eval_dens_py, axis=0)

axes[0].set_title("Dirichlet Process", fontsize=16)
axes[0].contour(xgrid, ygrid, mean_dens_dp.reshape(xgrid.shape))
for c in np.unique(best_clus_dp):
    currdata = data[best_clus_dp == c, :]
    axes[0].scatter(currdata[:, 0], currdata[:, 1])

axes[1].set_title("Pitman-Yor Process", fontsize=16)
axes[1].contour(xgrid, ygrid, mean_dens_py.reshape(xgrid.shape))
for c in np.unique(best_clus_py):
    currdata = data[best_clus_dp == c, :]
    axes[1].scatter(currdata[:, 0], currdata[:, 1])