In [7]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, skewnorm

# --- Load target data ---
training_file = "/work/gbadarac/MonoJet_NPLM/MonoJet_NPLM_analysis/Train_Ensembles/Generate_Data/saved_generated_target_data/2_dim/100k_2d_gaussian_heavy_tail_target_set.npy"
data = np.load(training_file)

# --- Generation params (no scaling) ---
# Feature 1: Bi-modal Gaussian mixture
wG = 0.50
mu_a, sig_a = -0.70, 0.12
mu_b, sig_b = -0.30, 0.12

# Feature 2: Skew-Normal
loc2, scale2, alpha2 = 1.0, 0.75, 8.0

# --- Analytic means (sanity check) ---
E_x1 = wG * mu_a + (1.0 - wG) * mu_b
delta = alpha2 / np.sqrt(1.0 + alpha2**2)
E_x2 = loc2 + scale2 * delta * np.sqrt(2.0 / np.pi)
print("Analytic means:", np.array([E_x1, E_x2]))
print("Sample means:", data.mean(axis=0))

# --- Plot marginals (same layout/style) ---
fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
label_fs, tick_fs, legend_fs, title_fs = 16, 14, 12, 18

# ===== Feature 1 (Bi-modal Gaussian mixture) =====
ax1 = axes[0]
ax1.hist(data[:, 0], bins=100, density=True, alpha=0.6, color="skyblue",
         edgecolor="black", label="Generated sample")

x1_grid = np.linspace(min(mu_a, mu_b) - 4*max(sig_a, sig_b),
                      max(mu_a, mu_b) + 4*max(sig_a, sig_b),
                      500)
pdf1 = (wG * norm.pdf(x1_grid, mu_a, sig_a)
        + (1.0 - wG) * norm.pdf(x1_grid, mu_b, sig_b))
ax1.plot(x1_grid, pdf1, "r-", lw=2, label="Ground truth")

ax1.set_xlabel("Feature 1", fontsize=label_fs)
ax1.set_ylabel("Density", fontsize=label_fs)
ax1.tick_params(axis="both", which="major", labelsize=tick_fs)
ax1.legend(fontsize=legend_fs)

# ===== Feature 2 (Skew-Normal) =====
ax2 = axes[1]
ax2.hist(data[:, 1], bins=100, density=True, alpha=0.6, color="lightgreen",
         edgecolor="black", label="Generated sample")

# Focus on central mass for readability
lo, hi = np.quantile(data[:, 1], [0.001, 0.999])
x2_grid = np.linspace(lo, hi, 800)

pdf2 = skewnorm.pdf(x2_grid, a=alpha2, loc=loc2, scale=scale2)
ax2.plot(x2_grid, pdf2, "r-", lw=2, label="Ground truth")
ax2.set_xlim(lo, hi)

ax2.set_xlabel("Feature 2", fontsize=label_fs)
ax2.tick_params(axis="both", which="major", labelsize=tick_fs)
ax2.legend(fontsize=legend_fs)

fig.suptitle("Marginal plots (Bi-modal Gaussian + Skew-Normal)", fontsize=title_fs, y=0.93)
fig.tight_layout(rect=[0, 0, 1, 0.9])
fig.savefig("marginal_plots_bimodal_gaussian_skewnormal_heavy_tail.pdf", bbox_inches="tight")
plt.close(fig)



Analytic means: [-0.5        1.5937924]
Sample means: [-0.49937305  1.5939858 ]
