# Comparison with package ```BNPmix```

In [None]:
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import arviz as az

# Define paths
CSVIN =  '../resources/csv/in/thesis'
CSVOUT = '../resources/csv/out/thesis'
RECORDIO = '../resources/recordio'

## ```galaxy``` dataset

In [None]:
# Read data and grid
galaxy_data = np.genfromtxt(f"{CSVIN}/galaxy.csv")
galaxy_grid = np.genfromtxt(f"{CSVIN}/galaxy_grid.csv")

In [None]:
# Define algorithms and mixings lists
mix = 'PY'
algos1 = 'Neal2 Neal3 Neal8'.split()
algos2 = 'mar ics'.split()

In [None]:
# Read densities
galaxy_dens_1 = dict()
galaxy_dens_2 = dict()
for algo in algos1:
    fdens = f"{CSVOUT}/galaxy_dens_{algo}_{mix}.csv"
    galaxy_dens_1[algo] = np.genfromtxt(fdens, delimiter=',')
for algo in algos2:
    fdens = f"{CSVOUT}/galaxy_dens_bnpmix_{algo}.csv"
    galaxy_dens_2[algo] = np.genfromtxt(fdens, delimiter=',')

In [None]:
# Plot densities
fig = plt.figure(figsize=(10,6))
plt.hist(galaxy_data, density=True, color='lightgray')
# Means use 1-in-2 thinning
for algo in algos1:
    dens = np.exp(np.mean(galaxy_dens_1[algo][0::2], axis=0))
    plt.plot(galaxy_grid, dens)
for algo in algos2:
    dens = np.mean(galaxy_dens_2[algo][0::2], axis=0)
    plt.plot(galaxy_grid, dens, linestyle='--')
plt.title("Pitman-Yor density estimation with bayesmix (solid) and bnpmix (dashed)")
plt.legend(algos1 + algos2)

In [None]:
# Compute Effective Sample Sizes
galaxy_ESS1 = dict()
galaxy_ESS2 = dict()
galaxy_ESS1[mix] = dict()
galaxy_ESS2[mix] = dict()
for algo in algos1:
    n_clust = np.genfromtxt(f"{CSVOUT}/galaxy_nclu_{algo}_{mix}.csv")
    galaxy_ESS1[mix][algo] = az.ess(n_clust)
for algo in algos2:
    n_clust = np.genfromtxt(f"{CSVOUT}/galaxy_nclu_bnpmix_{algo}.csv")
    galaxy_ESS2[mix][algo] = az.ess(n_clust)
# Show ESSs
print("galaxy ESS:")
print(galaxy_ESS1)
print(galaxy_ESS2)

In [None]:
# Read execution times
galaxy_times1 = {
    "Neal2": 1.371,
    "Neal3": 2.126,
    "Neal8": 2.047
}
galaxy_times2 = {
    "mar": 2.88611,
    "ics": 2.98552
}

In [None]:
# Compute ESS per second ratios
galaxy_ratio1 = dict()
galaxy_ratio2 = dict()
for algo in algos1:
    galaxy_ratio1[algo] = galaxy_ESS1[mix][algo] / galaxy_times1[algo]
for algo in algos2:
    galaxy_ratio2[algo] = galaxy_ESS2[mix][algo] / galaxy_times2[algo]
print("galaxy ratios:")
print(galaxy_ratio1)
print(galaxy_ratio2)

## ```faithful``` dataset

In [None]:
# Read data and grid
faithful_data = np.genfromtxt(f"{CSVIN}/faithful.csv")
faithful_grid = np.genfromtxt(f"{CSVIN}/faithful_grid.csv")

In [None]:
# Define algorithms and mixings lists
mix = 'PY'
algos1 = 'Neal2 Neal3 Neal8'.split()
algos2 = 'mar ics'.split()

In [None]:
# Read densities
faithful_dens_1 = dict()
faithful_dens_2 = dict()
for algo in algos1:
    fdens = f"{CSVOUT}/faithful_dens_{algo}_{mix}.csv"
    faithful_dens_1[algo] = np.genfromtxt(fdens, delimiter=',')
for algo in algos2:
    fdens = f"{CSVOUT}/faithful_dens_bnpmix_{algo}.csv"
    faithful_dens_2[algo] = np.genfromtxt(fdens, delimiter=',')

In [None]:
# Plot densities (contours in log-scale)
fig = plt.figure(figsize=(14,18))
fig.suptitle(7*" " + "Pitman-Yor log-density estimation", y=0.92, size='xx-large')
algos_all = 'Neal8 mar Neal2 ics'.split()
idx = 0
for idx in range(1, len(algos_all)+1):
    algo = algos_all[idx-1]
    # Means use 1-in-2 thinning
    if idx % 2 == 1:
        # bayesmix
        dens = np.mean(faithful_dens_1[algo][0::2], axis=0).reshape(-1,1)
        lib = "bayesmix"
    else:
        # BNPmix
        dens = np.log(np.mean(faithful_dens_2[algo][0::2], axis=0)).reshape(-1,1)
        lib = "BNPmix"
    plot_data = pd.DataFrame(np.hstack([faithful_grid, dens]), 
                             columns=["x", "y", "z"])
    Z = plot_data.pivot_table(index='x', columns='y', values='z').T.values
    X_unique = np.sort(plot_data.x.unique())
    Y_unique = np.sort(plot_data.y.unique())
    X, Y = np.meshgrid(X_unique, Y_unique)
    ax = fig.add_subplot(3, 2, idx)
    if Z.any():
        ax.contour(X, Y, Z)
    ax.set_title(f"{lib}'s {algo}")

In [None]:
# Compute Effective Sample Sizes
faithful_ESS1 = dict()
faithful_ESS2 = dict()
faithful_ESS1[mix] = dict()
faithful_ESS2[mix] = dict()
for algo in algos1:
    n_clust = np.genfromtxt(f"{CSVOUT}/faithful_nclu_{algo}_{mix}.csv")
    faithful_ESS1[mix][algo] = az.ess(n_clust)
for algo in algos2:
    n_clust = np.genfromtxt(f"{CSVOUT}/faithful_nclu_bnpmix_{algo}.csv")
    faithful_ESS2[mix][algo] = az.ess(n_clust)
# Show ESSs
print("faithful ESS:")
print(faithful_ESS1)
print(faithful_ESS2)

In [None]:
# Read execution times
faithful_times1 = {
    "Neal2":  6.703,
    "Neal3": 45.815,
    "Neal8": 43.019
}
faithful_times2 = {
    "mar": 11.7906,
    "ics":  8.82139
}

In [None]:
# Compute ESS per second ratios
faithful_ratio1 = dict()
faithful_ratio2 = dict()
for algo in algos1:
    faithful_ratio1[algo] = faithful_ESS1[mix][algo] / faithful_times1[algo]
for algo in algos2:
    faithful_ratio2[algo] = faithful_ESS2[mix][algo] / faithful_times2[algo]
print("faithful ratios:")
print(faithful_ratio1)
print(faithful_ratio2)