In [5]:
import numpy as np
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from joblib import Parallel, delayed
import os
import csv

In [6]:
def simulate_for_observed(obs_id, n_simulations):
    rng_obs = np.random.default_rng(obs_id)

    # Observed dataset from M0
    obs_data = np.random.normal(0, 1, size=(100,))
    obs_labels = np.zeros(100)

    # One simulation iteration
    def simulate_once(seed):
        rng = np.random.default_rng(seed)
        dist_type = rng.integers(0, 2)

        if dist_type == 0:
            sim_data = np.random.normal(0, 1, size=100)
            param = 0.0
        else:
            mu = rng.normal(0, 10)
            sim_data = np.random.normal(mu, 1, size=100)
            param = mu

        sim_labels = np.ones(100)

        X = np.concatenate([obs_data, sim_data]).reshape(-1, 1)
        y = np.concatenate([obs_labels, sim_labels])

        qda = QuadraticDiscriminantAnalysis()
        qda.fit(X, y)
        y_pred = qda.predict(X)
        acc = accuracy_score(y, y_pred)

        return acc, param, dist_type

    # Run all simulations in parallel
    results = Parallel(n_jobs=-1)(
        delayed(simulate_once)(i) for i in range(n_simulations)
    )

    accuracies = np.array([r[0] for r in results])
    params = np.array([r[1] for r in results])
    dist_types = np.array([r[2] for r in results])

    threshold = np.percentile(accuracies, 10)
    idx_selected = np.where(accuracies <= threshold)[0]

    print(f"Obs {obs_id:03d} — percentile threshold: {threshold:.4f} — {len(idx_selected)} rows selected")

    return [(obs_id, accuracies[i], params[i], dist_types[i]) for i in idx_selected]

In [7]:
n_simulations = 10**6
output_dir = "Gutmann_et_al/normal/M0"
os.makedirs(output_dir, exist_ok=True)

# Run for 100 observed datasets
seed = 12345
np.random.seed(seed)
random.seed(seed)

for obs_id in range(100):
    print(f"Running simulations for observed dataset {obs_id}")
    results = simulate_for_observed(obs_id, n_simulations=n_simulations)

    # Save to CSV
    filename = os.path.join(output_dir, f"qda_simulations_obs_{obs_id:03d}.csv")
    with open(filename, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["obs_id", "accuracy", "param", "dist_type"])
        writer.writerows(results)

    print(f"Saved {filename}")

Running simulations for observed dataset 0
Obs 000 — 10th percentile threshold: 0.5050 — 124 rows selected
Saved Gutmann_et_al/normal/M0/qda_simulations_obs_000.csv
Running simulations for observed dataset 1
Obs 001 — 10th percentile threshold: 0.5350 — 133 rows selected
Saved Gutmann_et_al/normal/M0/qda_simulations_obs_001.csv
Running simulations for observed dataset 2
Obs 002 — 10th percentile threshold: 0.4950 — 111 rows selected
Saved Gutmann_et_al/normal/M0/qda_simulations_obs_002.csv
Running simulations for observed dataset 3
Obs 003 — 10th percentile threshold: 0.5400 — 117 rows selected
Saved Gutmann_et_al/normal/M0/qda_simulations_obs_003.csv
Running simulations for observed dataset 4
Obs 004 — 10th percentile threshold: 0.5100 — 114 rows selected
Saved Gutmann_et_al/normal/M0/qda_simulations_obs_004.csv
Running simulations for observed dataset 5
Obs 005 — 10th percentile threshold: 0.5150 — 105 rows selected
Saved Gutmann_et_al/normal/M0/qda_simulations_obs_005.csv
Running si

In [6]:
import csv
import os

folder_path = "Gutmann_et_al/normal/M0"  

file_list = [
    f"qda_simulations_obs_{i:03d}.csv"
    for i in range(100)  
]

qda_frequencies = []
qda_means = []

for filename in file_list:
    filepath = os.path.join(folder_path, filename)
    
    model_choices = []
    parameters = []

    with open(filepath, 'r') as f:
        reader = csv.reader(f)
        next(reader) 

        for row in reader:
            try:
                param = float(row[2])
                model = int(row[3])
                model_choices.append(model)
                parameters.append((param, model))
            except (IndexError, ValueError):
                continue  

    # Frequency of model == 0
    freq_model0 = model_choices.count(0) / len(model_choices) if model_choices else 0.0
    qda_frequencies.append(freq_model0)

    # Mean parameter when model == 1
    model1_params = [param for param, model in parameters if model == 1]
    mean_param_model1 = sum(model1_params) / len(model1_params) if model1_params else 0.0
    qda_means.append(mean_param_model1)

# Save results
with open("normal_model0_QDA_probabilities.csv", "w", newline='') as f_out:
    writer = csv.writer(f_out)
    writer.writerow(["QDA"])
    for val in qda_frequencies:
        writer.writerow([val])

with open("normal_model0_QDA_mean_mu.csv", "w", newline='') as f_out:
    writer = csv.writer(f_out)
    writer.writerow(["QDA"])
    for val in qda_means:
        writer.writerow([val])