This is to generate sample data. 

In [3]:
from pysaebm import generate, get_params_path
import os
import numpy as np 
import json 
import yaml

def load_config():
    # Use the current working directory (typically where Jupyter Book builds from)
    config_path = os.path.abspath(os.path.join(os.getcwd(), "..", "config.yaml"))
    
    with open(config_path, "r") as f:
        return yaml.safe_load(f)

def convert_np_types(obj):
    """Convert numpy types in a nested dictionary to Python standard types."""
    if isinstance(obj, dict):
        return {k: convert_np_types(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_np_types(item) for item in obj]
    elif isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return convert_np_types(obj.tolist())
    else:
        return obj

In [4]:
# Get path to default parameters
params_file = get_params_path()

config = load_config()
print("Loaded config:")
print(json.dumps(config, indent=4))

OUTPUT_DIR = '../toy_data'

all_dicts = []

for exp_name in config['EXPERIMENT_NAMES']:
    true_order_and_stages_dicts = generate(
            experiment_name = exp_name,
            params_file=params_file,
            js = [200],
            rs = [0.25],
            num_of_datasets_per_combination=1,
            output_dir=OUTPUT_DIR,
            seed=config['GEN_SEED'],
            keep_all_cols = True,
        )
    all_dicts.append(true_order_and_stages_dicts)

combined = {k: v for d in all_dicts for k, v in d.items()}
combined = convert_np_types(combined)

# Dump the JSON
with open(f"{OUTPUT_DIR}/true_order_and_stages.json", "w") as f:
    json.dump(combined, f, indent=2)

Loaded config:
{
    "N_VARIANTS": 50,
    "NStartpoints": 10,
    "Niterations": 1000,
    "N_MCMC": 10000,
    "N_SHUFFLE": 2,
    "BURN_IN": 500,
    "THINNING": 1,
    "GEN_SEED": 42,
    "JS": [
        50,
        200,
        500,
        1000
    ],
    "MCMC_SEED": 53,
    "RS": [
        0.1,
        0.25,
        0.5,
        0.75,
        0.9
    ],
    "N_BOOTSTRAP": 50,
    "SA_EBM_ALGO_NAMES": [
        "conjugate_priors",
        "mle",
        "kde",
        "em",
        "hard_kmeans"
    ],
    "OTHER_ALGO_NAMES": [
        "debm",
        "debm_gmm",
        "ucl_gmm",
        "ucl_kde"
    ],
    "OUTPUT_DIR": "algo_results",
    "EXPERIMENT_NAMES": [
        "sn_kjOrdinalDM_xnjNormal",
        "sn_kjOrdinalDM_xnjNonNormal",
        "sn_kjOrdinalUniform_xnjNormal",
        "sn_kjOrdinalUniform_xnjNonNormal",
        "sn_kjContinuousUniform",
        "sn_kjContinuousBeta",
        "xiNearNormal_kjContinuousUniform",
        "xiNearNormal_kjContinuousBeta",
        "