In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv('./kde_hpo_results.csv')

fontsize = 16
plt.rcParams.update({'font.size': fontsize})

plt.figure(figsize=(10, 7))
bandwidths = []
losses = []
data['bandwidth'] = data['bandwidth'].apply(lambda x: float(x))
data['nll'] = data['nll'].apply(lambda x: float(x))
plt.plot(data['bandwidth'], data['nll'], label='Mean NLL vs. Bandwidth', color='steelblue')

best_bandwidth = 0.3520031472796679
best_nll = 1.255128493353645

plt.scatter(best_bandwidth, best_nll, color='darkred', label='Best Bandwidth (0.352)', zorder=5)
plt.text(best_bandwidth + 0.8, best_nll + 0.5, f'Best NLL: {best_nll:3f}', fontsize=14, verticalalignment='bottom', horizontalalignment='right')

plt.xlabel('KDE Bandwidth')
plt.ylabel('Mean Negative Log-Likelihood (NLL)')
plt.yscale('log')
plt.xscale('log')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv('./bgmm_hpo_results.csv')

fontsize = 16
plt.rcParams.update({'font.size': fontsize})

plt.figure(figsize=(10, 7))
bandwidths = []
losses = []
data['weight_concentration_prior'] = data['weight_concentration_prior'].apply(lambda x: float(x))
data['nll'] = data['nll'].apply(lambda x: float(x))
plt.plot(data['weight_concentration_prior'], data['nll'], label='Mean NLL vs. Weight Concentration Prior', color='steelblue')

best_weight_concentration_prior = 0.0009794696670695395
best_nll = 1.359173272199354

plt.scatter(best_weight_concentration_prior, best_nll, color='darkred', label='Best Weight Concentration Prior (9.8e-4.)', zorder=5)
plt.text(best_weight_concentration_prior + 0.004, best_nll + 0.03, f'Best NLL: {best_nll:3f}', fontsize=14, verticalalignment='bottom', horizontalalignment='right')

plt.xlabel('DP-GMM Weight Concentration Prior')
plt.ylabel('Mean Negative Log-Likelihood (NLL)')
plt.xscale('log')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


data_kde = pd.read_csv('./kde_hpo_results.csv')
data_bgmm = pd.read_csv('./bgmm_hpo_results.csv')

fontsize = 16
plt.rcParams.update({'font.size': fontsize})

fig, axs = plt.subplots(1, 2, figsize=(18, 6.5))  # 1 row, 2 columns

data_kde['bandwidth'] = data_kde['bandwidth'].apply(lambda x: float(x))
data_kde['nll'] = data_kde['nll'].apply(lambda x: float(x))
axs[0].plot(data_kde['bandwidth'], data_kde['nll'], label='Mean NLL vs. Bandwidth', color='steelblue', marker='.')

best_bandwidth = 0.3520031472796679
best_nll_kde = 1.255128493353645
axs[0].scatter(best_bandwidth, best_nll_kde, color='darkred', label='Best Bandwidth (0.352)', zorder=5)
axs[0].text(best_bandwidth + 0.8, best_nll_kde + 0.5, f'Best NLL: {best_nll_kde:.3f}', fontsize=14, 
           verticalalignment='bottom', horizontalalignment='right')

axs[0].set_xlabel('KDE Bandwidth')
axs[0].set_ylabel('Mean Negative Log-Likelihood (NLL)')
axs[0].set_yscale('log')
axs[0].set_xscale('log')
axs[0].legend()

data_bgmm['weight_concentration_prior'] = data_bgmm['weight_concentration_prior'].apply(lambda x: float(x))
data_bgmm['nll'] = data_bgmm['nll'].apply(lambda x: float(x))
axs[1].plot(data_bgmm['weight_concentration_prior'], data_bgmm['nll'], label='Mean NLL vs. Weight Concentration Prior', color='steelblue', marker='.')

best_weight_concentration_prior = 0.0009794696670695395
best_nll_bgmm = 1.359173272199354
axs[1].scatter(best_weight_concentration_prior, best_nll_bgmm, color='darkred', 
               label='Best Weight Concentration Prior (9.8e-4)', zorder=5)
axs[1].text(best_weight_concentration_prior + 0.004, best_nll_bgmm + 0.03, 
           f'Best NLL: {best_nll_bgmm:.3f}', fontsize=14, 
           verticalalignment='bottom', horizontalalignment='right')

axs[1].set_xlabel('DP-GMM Weight Concentration Prior')
axs[1].set_ylabel('Mean Negative Log-Likelihood (NLL)')
axs[1].set_xscale('log')
axs[1].legend()


plt.tight_layout()
plt.show()
