In [None]:
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(0)

from bayesmixpy import build_bayesmix, run_mcmc

In [None]:
build_bayesmix(4)

In [None]:
import os

# Set the correct environment variable from the output of 
# "build_bayesmix"
os.environ["BAYESMIX_EXE"] = "../../build/run_mcmc"

# Univariate Example

In [None]:
data = np.concatenate([
    np.random.normal(loc=3, scale=1, size=100),
    np.random.normal(loc=-3, scale=1, size=100),
])

dp_params = """
fixed_value {
    totalmass: 1.0
}
"""

g0_params = """
fixed_values {
    mean: 0.0
    var_scaling: 0.1
    shape: 2.0
    scale: 2.0
}
"""

algo_params = """
    algo_id: "Neal2"
    rng_seed: 20201124
    iterations: 2000
    burnin: 1000
    init_num_clusters: 3
"""

In [None]:
dens_grid = np.linspace(-6, 6, 1000)

log_dens, numcluschain, cluschain, bestclus = run_mcmc(
    "NNIG", "DP", data, g0_params, dp_params, algo_params, 
    dens_grid=dens_grid, return_clusters=True, return_num_clusters=True,
    return_best_clus=True)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))

axes[0].hist(data, alpha=0.2, density=True)
for c in np.unique(bestclus):
    data_in_clus = data[bestclus == c]
    axes[0].scatter(data_in_clus, np.zeros_like(data_in_clus) + 0.01, 
                    label="Cluster {0}".format(int(c) + 1))
axes[0].plot(dens_grid, np.exp(np.mean(log_dens, axis=0)), color="red", lw=3)
axes[0].legend(fontsize=12, ncol=2, loc=1)
axes[0].set_ylim(0, 0.3)


x, y = np.unique(numcluschain, return_counts=True)
axes[1].bar(x, y / y.sum())
axes[1].set_xticks(x)

axes[2].vlines(np.arange(len(numcluschain)), numcluschain-0.3, numcluschain+0.3)
plt.show()

# Bivariate Example

In [None]:
g0_params = """
fixed_values {
    mean {
        size: 2
        data: [3.484, 3.487]
    }
    var_scaling: 0.01
    deg_free: 5
    scale {
        rows: 2
        cols: 2
        data: [1.0, 0.0, 0.0, 1.0]
        rowmajor: false
    }
}
"""

In [None]:
data = np.loadtxt("../../resources/datasets/faithful.csv", delimiter=',')

In [None]:
xgrid = np.linspace(0, 6, 50)
xgrid, ygrid = np.meshgrid(xgrid, xgrid)
dens_grid = np.hstack([xgrid.reshape(-1, 1), ygrid.reshape(-1, 1)])

log_dens, numcluschain, _, best_clus_dp = run_mcmc(
    "NNW", "DP", data, g0_params, dp_params, algo_params, 
    dens_grid, return_clusters=False, return_num_clusters=True,
    return_best_clus=True)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))

mean_dens_dp = np.mean(log_dens, axis=0)

axes[0].contour(xgrid, ygrid, mean_dens_dp.reshape(xgrid.shape))
for c in np.unique(best_clus_dp):
    currdata = data[best_clus_dp == c, :]
    axes[0].scatter(currdata[:, 0], currdata[:, 1])
    
x, y = np.unique(numcluschain, return_counts=True)
axes[1].bar(x, y / y.sum())
axes[1].set_xticks(x)

axes[2].vlines(np.arange(len(numcluschain)), numcluschain-0.3, numcluschain+0.3)
plt.show()