In [71]:
# Authors: Nicholas C. Firth <ncfirth87@gmail.com>
# License: TBC
from ebm import mixture_model
from ebm import mcmc
from ebm import plotting
from ebm import datasets
from ebm import distributions
from matplotlib import pyplot as plt

import warnings
import scipy.optimize as opt
from scipy.stats import kendalltau
import matplotlib as mpl
import json

# Use a non-interactive backend
mpl.use('Agg')  # 'Agg' is a backend for non-GUI environments

# Suppress specific warnings
warnings.filterwarnings('ignore', category=RuntimeWarning, module='scipy.optimize')
warnings.filterwarnings('ignore', category=UserWarning, module='ebm.mcmc')

In [72]:
ns = [50, 200, 500]
rs = [0.1, 0.25, 0.5]
dic = {}
dic['synthetic'] = {}
# dic['synthetic']['basic_method'] = {}
# dic['synthetic']['bootstrap_sequence'] = {}
dic['chen_data'] = {}
# dic['chen_data']['basic_method'] = {}
# dic['chen_data']['bootstrap_sequence'] = {}

In [73]:
def obtain_ml_order_bootstrap_sequence(n, r, dic):
    comb_str = f"{int(n*r)}|{n}"
    data_dir = f"synthetic/{comb_str}_reformatted.csv" 
    X, y, bmname, cname = datasets.load_synthetic(data_dir)
    mixture_models = []
    for i in range(X.shape[1]):
        h_model = distributions.Gaussian()
        d_model = distributions.Gaussian()
        gmm = mixture_model.MixtureModel(cn_comp=h_model,
                                            ad_comp=d_model)
        gmm.fit(X[:, i], y)
        mixture_models.append(gmm)
    plt.close()
    samples = mcmc.mcmc(X, mixture_models, n_iter=2000,
                        greedy_n_iter=10, greedy_n_init=2)
    samples.sort(reverse=True)
    ml_order = samples[0].ordering
    tau, p_value = kendalltau(ml_order, range(0, len(ml_order)))
    dic[comb_str] = tau

In [74]:
def obtain_ml_order_bootstrap_sequence_chen_data(n, dic):
    comb_str = f"{n}"
    data_dir = f"chen_data/{comb_str}_reformatted.csv"  
    X, y, bmname, cname = datasets.load_synthetic(data_dir)
    mixture_models = []
    for i in range(X.shape[1]):
        h_model = distributions.Gaussian()
        d_model = distributions.Gaussian()
        gmm = mixture_model.MixtureModel(cn_comp=h_model,
                                            ad_comp=d_model)
        gmm.fit(X[:, i], y)
        mixture_models.append(gmm)
    plt.close()
    samples = mcmc.mcmc(X, mixture_models, n_iter=2000,
                        greedy_n_iter=10, greedy_n_init=2)
    samples.sort(reverse=True)
    ml_order = samples[0].ordering
    tau, p_value = kendalltau(ml_order, range(0, len(ml_order)))
    dic[comb_str] = tau

In [75]:
for n in ns:
    for r in rs:
        obtain_ml_order_bootstrap_sequence(n, r, dic['synthetic'])

In [76]:
for n in [144, 500]:
    obtain_ml_order_bootstrap_sequence_chen_data(n, dic['chen_data'])

In [77]:
tau_json = 'tau_bootstrap_sequence.json'
with open(tau_json, "w") as fp:
    json.dump(dic, fp, indent = 4)