In [2]:
import numpy as np
import torch
import os
from sklearn.neighbors import KernelDensity
from tqdm import tqdm

# -----------------------
# Config
# -----------------------
output_dir = "/Users/jafu/Documents/VSCode_Projects/MastersThesis/MonoJet_NPLM_analysis/Generate_Gaussian_Toy/saved_generated_gaussian_toys"
os.makedirs(output_dir, exist_ok=True)

# Target distribution params (known 2D Gaussian)
mean = np.array([-0.5, 0.6])
cov = np.diag([0.25**2, 0.4**2])  # diagonal covariance
inv_cov = np.linalg.inv(cov)
det_cov = np.linalg.det(cov)

# Reproducibility
np.random.seed(1234)

# -----------------------
# Generate target data
# -----------------------
n_points = 100000
x_eval = np.random.multivariate_normal(mean, cov, size=n_points).astype(np.float32)

# Compute analytical PDF at x_eval
delta = x_eval - mean
exponent = -0.5 * np.sum(delta @ inv_cov * delta, axis=1)
norm_const = 1. / (2 * np.pi * np.sqrt(det_cov))
target_probs = norm_const * np.exp(exponent)  # shape: (100000,)



In [3]:
# -----------------------
# Generate toy models with structured bumps
# -----------------------

n_models = 10
model_probs = []
centers = []
amplitudes = []
widths = []

for i in tqdm(range(n_models), desc="Generating models with structured bumps"):
    # 1. Random center
    center = x_eval[np.random.choice(len(x_eval))]

    # 2. Random amplitude
    amplitude = np.random.uniform(0.05, 0.2)

    # 3. Random bump width (isotropic Gaussian)
    width = np.random.uniform(0.03, 0.1)
    cov = np.diag([width**2, width**2])
    inv_cov = np.linalg.inv(cov)

    # Compute bump
    delta = x_eval - center
    exponent = -0.5 * np.sum(delta @ inv_cov * delta, axis=1)
    bump = np.exp(exponent)
    bump /= bump.sum()  # normalize to unit integral

    # Add to base target
    perturbed = target_probs + amplitude * bump

    # Normalize to make total = 100k
    perturbed /= np.sum(perturbed)
    perturbed *= len(x_eval)

    model_probs.append(torch.tensor(perturbed, dtype=torch.float32))


Generating models with structured bumps: 100%|██████████| 10/10 [00:00<00:00, 118.93it/s]


In [6]:
# -----------------------
# Plot marginals
# -----------------------
import matplotlib.pyplot as plt
model_probs_np = torch.stack(model_probs).numpy()  # shape: (10, 100000)
feature_names = ["Feature 1", "Feature 2"]
bins = 100

# Loop over models
for model_idx, probs in enumerate(model_probs_np):
    fig, axes = plt.subplots(2, 1, figsize=(8, 6), sharex=False)

    for i in range(2):  # Feature 0 and 1
        feature = x_eval[:, i]
        bin_edges = np.linspace(feature.min(), feature.max(), bins + 1)
        bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
        bin_widths = np.diff(bin_edges)

        hist = np.histogram(feature, bins=bin_edges, weights=probs)[0]
        density = hist / bin_widths

        ax = axes[i]
        ax.plot(bin_centers, density, color="blue")
        ax.set_ylabel("Density")
        ax.set_title(f"Model {model_idx} — Marginal: {feature_names[i]}")

    axes[1].set_xlabel("Feature value")
    plt.tight_layout()
    fig.savefig(os.path.join(output_dir, f"model_{model_idx}_marginals.png"))
    plt.close(fig)
print("Marginals plotted and saved.")

Marginals plotted and saved.


In [None]:
# -----------------------
# Save outputs
# -----------------------
torch.save(model_probs, os.path.join(output_dir, "f_i.pth"))
np.save(os.path.join(output_dir, "100k_target_training_set.npy"), x_eval)
np.savez(os.path.join(output_dir, "bump_metadata.npz"),
         centers=np.array(centers),
         amplitudes=np.array(amplitudes),
         widths=np.array(widths))

print(f"Saved {n_models} models with structured bumps to: {output_dir}")