In [None]:
import matplotlib.pyplot as plt
import numpy as np

from bayesmixpy import run_mcmc

In [None]:
# Generate data
data = np.concatenate([
    np.random.normal(loc=3, scale=1, size=100),
    np.random.normal(loc=-3, scale=1, size=100),
])

# Plot data
plt.hist(data)
plt.show()

In [None]:
# Hierarchy settings
hier_params = \
"""
fixed_values {
  mean: 0.0
  var: 10.0
  shape: 2.0
  scale: 2.0
}
"""

# Mixing settings
mix_params = \
"""
fixed_value {
    totalmass: 1.0
}
"""

# Algorithm settings
algo_params = \
"""
algo_id: "Neal8"
rng_seed: 20201124
iterations: 2000
burnin: 1000
init_num_clusters: 3
neal8_n_aux: 3
"""

# Evaluation grid
dens_grid = np.linspace(-6.5, 6.5, 1000)

In [None]:
# Fit model using bayesmixpy
eval_dens, n_clus, clus_chain, best_clus, _ = run_mcmc("NNxIG","DP", data,
                                                    hier_params, mix_params, algo_params,
                                                    dens_grid, return_num_clusters=True,
                                                    return_clusters=True, return_best_clus=True)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Posterior distribution of clusters
x, y = np.unique(n_clus, return_counts=True)
axes[0].bar(x, y / y.sum())
axes[0].set_xticks(x)
axes[0].set_title("Posterior distribution of the number of clusters")

# Plot mean posterior density
axes[1].plot(dens_grid, np.exp(np.mean(eval_dens, axis=0)))
axes[1].hist(data, alpha=0.3, density=True)
for c in np.unique(best_clus):
    data_in_clus = data[best_clus == c]
    axes[1].scatter(data_in_clus, np.zeros_like(data_in_clus) + 0.01)
axes[1].set_title("Posterior density estimate")

# Show results
plt.show()