In [None]:
# -*- coding: utf-8 -*-
import os
import re
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

# Optional: comment out if you don't need seaborn
import seaborn as sns  # noqa: F401

from scipy.signal import find_peaks  # noqa: F401
from scipy.signal import savgol_filter  # noqa: F401
from scipy.ndimage import gaussian_filter1d  # noqa: F401
from scipy.linalg import svd  # noqa: F401

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold

# ---------------------------------------------------------------------
# Repro defaults (each repeat gets its own seed below; this just fixes
# library-internal nondeterminism as much as feasible)
# ---------------------------------------------------------------------
np.random.seed(42)
tf.random.set_seed(42)

# (Optional) make TF less memory hungry on GPU
try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception:
    pass

# ---------------------------------------------------------------------
# Helpers you had (kept here; safe imports inside fn so file runs w/o fisher_py)
# ---------------------------------------------------------------------
def helper_regex(text):
    m = re.search(rf"{'Full'}\s+(\w+)", str(text))
    return m.group(1) if m else None

def MS1Casting(folder_path, file_path):
    try:
        from fisher_py.raw_file import RawFile
        from fisher_py.scan import Scan
    except Exception:
        raise ImportError("fisher_py is required for MS1Casting")
    os.chdir(folder_path)
    raw = RawFile(file_path)
    data_intensities = [0]*1369
    for i in tqdm(range(1, raw.number_of_scans)):
        raw_scan = Scan.from_file(raw._raw_file_access, scan_number=i)
        if str(helper_regex(raw_scan.scan_type)) == 'ms':
            scan_masses = raw_scan.preferred_masses
            scan_intensities = raw_scan.preferred_intensities
            for j in range(len(scan_masses)):
                index = round(scan_masses[j])
                if 600 < index < 1969:
                    data_intensities[index-600] += scan_intensities[j]
    return data_intensities

def MS1Casting_highres(folder_path, file_path):
    try:
        from fisher_py.raw_file import RawFile
        from fisher_py.scan import Scan
    except Exception:
        raise ImportError("fisher_py is required for MS1Casting_highres")
    os.chdir(folder_path)
    raw = RawFile(file_path)
    data_intensities = [0]*13690
    for i in tqdm(range(1, raw.number_of_scans)):
        raw_scan = Scan.from_file(raw._raw_file_access, scan_number=i)
        if str(helper_regex(raw_scan.scan_type)) == 'ms':
            scan_masses = raw_scan.preferred_masses
            scan_intensities = raw_scan.preferred_intensities
            for j in range(len(scan_masses)):
                index = int((round(scan_masses[j], 1))*10)
                if 6000 < index < 19690:
                    data_intensities[index-6000] += scan_intensities[j]
    return data_intensities

# ---------------------------------------------------------------------
# Gradient averaging helper (models-then-samples mean of log-odds gradient)
# ---------------------------------------------------------------------
@tf.function(reduce_retracing=True)
def _log_odds_grad_for_model(x1, model, class_a, class_b, eps):
    with tf.GradientTape() as tape:
        tape.watch(x1)
        p = model(x1, training=False)  # (1, C)
        log_odds = tf.math.log(p[:, class_a] + eps) - tf.math.log(p[:, class_b] + eps)
    g = tape.gradient(log_odds, x1)  # (1, D)
    return tf.squeeze(g, axis=0)     # (D,)

def compute_avg_logodds_gradient_for_pair(X: tf.Tensor, models: list, class_a: int, class_b: int, eps: float = 1e-8):
    """
    Average input gradient of log p(class_a|x) - log p(class_b|x) across samples and models.
    Returns (D,) tensor.
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    N, D = X.shape[0], X.shape[1]
    sample_grads = []

    for i in range(N):
        x_i = X[i:i+1]  # (1, D)
        grads_over_models = []
        for m in models:
            g = _log_odds_grad_for_model(x_i, m, class_a, class_b, eps)
            grads_over_models.append(g)
        g_avg_models = tf.reduce_mean(tf.stack(grads_over_models, axis=0), axis=0)  # (D,)
        sample_grads.append(g_avg_models)

    avg_grad = tf.reduce_mean(tf.stack(sample_grads, axis=0), axis=0)  # (D,)
    return avg_grad

# ---------------------------------------------------------------------
# Simple deconvolution + plotting (unchanged)
# ---------------------------------------------------------------------
def charge_state_deconvolution(x_values, y_values, max_charge=50, intensity_threshold=0.0005):
    mass_range = np.linspace(10000, 20000, len(x_values))
    deconvoluted_spectrum = np.zeros_like(mass_range)
    charge_mapping = {}
    peak_list = []

    for charge in range(1, max_charge + 1):
        neutral_masses = x_values * charge
        for i, neutral_mass in enumerate(neutral_masses):
            if 10000 <= neutral_mass <= 20000 and y_values[i] > intensity_threshold:
                idx = np.searchsorted(mass_range, neutral_mass)
                if idx < len(deconvoluted_spectrum):
                    deconvoluted_spectrum[idx] += y_values[i]
                    charge_mapping[x_values[i]] = charge
                    peak_list.append((neutral_mass, deconvoluted_spectrum[idx]))

    top_peak = max(peak_list, key=lambda x: x[1]) if peak_list else (None, None)
    return mass_range, deconvoluted_spectrum, charge_mapping, top_peak

def plot_raw_spectrum(x_values, observed_spectrum, charge_mapping):
    plt.figure(figsize=(8, 5))
    plt.plot(x_values, observed_spectrum, label='Observed Spectrum', linestyle='--')
    plt.legend()
    plt.xlabel('Mass/Charge (m/z)')
    plt.ylabel('Intensity')
    plt.title('')
    plt.show()

# ---------------------------------------------------------------------
# Data load & prep (bin == 45; normalize each column by (max+1))
# ---------------------------------------------------------------------
CSV_PATH = r'F:/casts/dataset_rt.csv'   # <- adjust if needed
BIN_VALUE = 45

df = pd.read_csv(CSV_PATH)
filtered_df = df[df['bin'] == BIN_VALUE].copy()

cols_to_normalize = filtered_df.columns.difference(['bin', 'target'])
filtered_df[cols_to_normalize] = filtered_df[cols_to_normalize].apply(lambda x: x / (x.max() + 1.0))
filtered_df = filtered_df.drop(columns=['bin'])

X = filtered_df.copy()
Y = X.pop("target")
X_train = np.nan_to_num(np.array(X), copy=False)
y_train = np.nan_to_num(np.array(Y), copy=False)

input_dim = X_train.shape[1]          # expected 13690
num_classes = int(np.max(y_train)) + 1
assert input_dim >= 10000, f"Expected >=10000 features, got {input_dim}"

# ---------------------------------------------------------------------
# Model builder
# ---------------------------------------------------------------------
def build_model(input_dim: int, num_classes: int):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.01)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

# ---------------------------------------------------------------------
# Nested training: 5-fold CV × 10 random inits within each fold = 50 models
# ---------------------------------------------------------------------
k = 5
N_REPEATS = 10
EPOCHS = 50
BATCH_SIZE = 32
rand_int = 100

kf = KFold(n_splits=k, shuffle=True, random_state=42)

all_models = []   # will hold 50 models
fold_histories = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_train, y_train), 1):
    print(f"\n=== Fold {fold}/{k} ===")
    X_tr, y_tr = X_train[train_idx], y_train[train_idx]
    X_va, y_va = X_train[val_idx], y_train[val_idx]

    for r in range(N_REPEATS):
        seed = rand_int * fold + r
        tf.keras.utils.set_random_seed(seed)
        np.random.seed(seed)

        m = build_model(input_dim, num_classes)
        hist = m.fit(
            X_tr, y_tr,
            epochs=EPOCHS,
            batch_size=BATCH_SIZE,
            validation_data=(X_va, y_va),
            verbose=0
        )
        all_models.append(m)
        fold_histories.append(hist.history)
        print(f"  Trained model {r+1}/{N_REPEATS} for fold {fold} (seed={seed})")

print(f"\nTotal models trained: {len(all_models)} (expected 50)")

# ---------------------------------------------------------------------
# Average gradients across ALL 50 models
# ---------------------------------------------------------------------
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)

# Example: compute gradients for two pairs (adjust indices as needed)
avg_grad_all_1_vs_0 = compute_avg_logodds_gradient_for_pair(
    X_train_tensor, all_models, class_a=1, class_b=0
)
avg_grad_all_2_vs_0 = compute_avg_logodds_gradient_for_pair(
    X_train_tensor, all_models, class_a=2, class_b=0
)

# ---------------------------------------------------------------------
# Persist averaged gradients
# ---------------------------------------------------------------------
out_dir = "./avg_grads_5x10"
os.makedirs(out_dir, exist_ok=True)
np.save(os.path.join(out_dir, "avg_grad_all_1_vs_0.npy"), avg_grad_all_1_vs_0.numpy())
np.save(os.path.join(out_dir, "avg_grad_all_2_vs_0.npy"), avg_grad_all_2_vs_0.numpy())
print(f"Saved averaged gradients to: {out_dir}")

# ---------------------------------------------------------------------
# Map first 10,000 features to x-grid (600..1600 by 0.1) and visualize
# ---------------------------------------------------------------------
x = np.arange(600, 1600, 0.1)  # length 10000

y_1_vs_0 = avg_grad_all_1_vs_0.numpy().flatten()[:10000]
y_2_vs_0 = avg_grad_all_2_vs_0.numpy().flatten()[:10000]

# Split pos/neg for 2 vs 0 example
x_pos_2v0 = x[y_2_vs_0 > 0]
y_pos_2v0 = y_2_vs_0[y_2_vs_0 > 0]
x_neg_2v0 = x[y_2_vs_0 < 0]
y_neg_2v0 = y_2_vs_0[y_2_vs_0 < 0]

# Deconvolution + plots (negative grads as positive by multiplying -1)
y_values = -1.0 * y_neg_2v0
x_values = x_neg_2v0
mass_values, deconvoluted_spectrum, charge_mapping, top_peak = charge_state_deconvolution(x_values, y_values)
print("Top Neutral Mass (neg grads, ALL 50 models):", top_peak)
plot_raw_spectrum(x_values, y_values, charge_mapping)

y_values = y_pos_2v0
x_values = x_pos_2v0
mass_values, deconvoluted_spectrum, charge_mapping, top_peak = charge_state_deconvolution(x_values, y_values)
print("Top Neutral Mass (pos grads, ALL 50 models):", top_peak)
plot_raw_spectrum(x_values, y_values, charge_mapping)


In [1]:
# -*- coding: utf-8 -*-
import os
import re
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

# Optional
import seaborn as sns  # noqa: F401

from scipy.signal import find_peaks  # noqa: F401
from scipy.signal import savgol_filter  # noqa: F401
from scipy.ndimage import gaussian_filter1d  # noqa: F401
from scipy.linalg import svd  # noqa: F401

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold

# ---------------------------------------------------------------------
# Repro defaults
# ---------------------------------------------------------------------
np.random.seed(42)
tf.random.set_seed(42)
try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception:
    pass

# ---------------------------------------------------------------------
# Your helpers (kept for completeness; unused in this script’s flow)
# ---------------------------------------------------------------------
def helper_regex(text):
    m = re.search(rf"{'Full'}\s+(\w+)", str(text))
    return m.group(1) if m else None

def MS1Casting(folder_path, file_path):
    try:
        from fisher_py.raw_file import RawFile
        from fisher_py.scan import Scan
    except Exception:
        raise ImportError("fisher_py is required for MS1Casting")
    os.chdir(folder_path)
    raw = RawFile(file_path)
    data_intensities = [0]*1369
    for i in tqdm(range(1, raw.number_of_scans)):
        raw_scan = Scan.from_file(raw._raw_file_access, scan_number=i)
        if str(helper_regex(raw_scan.scan_type)) == 'ms':
            scan_masses = raw_scan.preferred_masses
            scan_intensities = raw_scan.preferred_intensities
            for j in range(len(scan_masses)):
                index = round(scan_masses[j])
                if 600 < index < 1969:
                    data_intensities[index-600] += scan_intensities[j]
    return data_intensities

def MS1Casting_highres(folder_path, file_path):
    try:
        from fisher_py.raw_file import RawFile
        from fisher_py.scan import Scan
    except Exception:
        raise ImportError("fisher_py is required for MS1Casting_highres")
    os.chdir(folder_path)
    raw = RawFile(file_path)
    data_intensities = [0]*13690
    for i in tqdm(range(1, raw.number_of_scans)):
        raw_scan = Scan.from_file(raw._raw_file_access, scan_number=i)
        if str(helper_regex(raw_scan.scan_type)) == 'ms':
            scan_masses = raw_scan.preferred_masses
            scan_intensities = raw_scan.preferred_intensities
            for j in range(len(scan_masses)):
                index = int((round(scan_masses[j], 1))*10)
                if 6000 < index < 19690:
                    data_intensities[index-6000] += scan_intensities[j]
    return data_intensities

# ---------------------------------------------------------------------
# Gradient helper
# ---------------------------------------------------------------------
@tf.function(reduce_retracing=True)
def _log_odds_grad_for_model(x1, model, class_a, class_b, eps):
    with tf.GradientTape() as tape:
        tape.watch(x1)
        p = model(x1, training=False)  # (1, C)
        log_odds = tf.math.log(p[:, class_a] + eps) - tf.math.log(p[:, class_b] + eps)
    g = tape.gradient(log_odds, x1)    # (1, D)
    return tf.squeeze(g, axis=0)       # (D,)

def compute_avg_logodds_gradient_for_pair(X: tf.Tensor, models: list, class_a: int, class_b: int, eps: float = 1e-8):
    """
    Average input gradient of log p(class_a|x) - log p(class_b|x) across samples and models.
    Returns (D,) tensor.
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    N, D = X.shape[0], X.shape[1]
    sample_grads = []

    for i in range(N):
        x_i = X[i:i+1]  # (1, D)
        grads_over_models = []
        for m in models:
            g = _log_odds_grad_for_model(x_i, m, class_a, class_b, eps)
            grads_over_models.append(g)
        g_avg_models = tf.reduce_mean(tf.stack(grads_over_models, axis=0), axis=0)  # (D,)
        sample_grads.append(g_avg_models)

    avg_grad = tf.reduce_mean(tf.stack(sample_grads, axis=0), axis=0)  # (D,)
    return avg_grad

# ---------------------------------------------------------------------
# Simple deconvolution (kept from your code; not used in comparisons)
# ---------------------------------------------------------------------
def charge_state_deconvolution(x_values, y_values, max_charge=50, intensity_threshold=0.0005):
    mass_range = np.linspace(10000, 20000, len(x_values))
    deconvoluted_spectrum = np.zeros_like(mass_range)
    charge_mapping = {}
    peak_list = []

    for charge in range(1, max_charge + 1):
        neutral_masses = x_values * charge
        for i, neutral_mass in enumerate(neutral_masses):
            if 10000 <= neutral_mass <= 20000 and y_values[i] > intensity_threshold:
                idx = np.searchsorted(mass_range, neutral_mass)
                if idx < len(deconvoluted_spectrum):
                    deconvoluted_spectrum[idx] += y_values[i]
                    charge_mapping[x_values[i]] = charge
                    peak_list.append((neutral_mass, deconvoluted_spectrum[idx]))

    top_peak = max(peak_list, key=lambda x: x[1]) if peak_list else (None, None)
    return mass_range, deconvoluted_spectrum, charge_mapping, top_peak

# ---------------------------------------------------------------------
# Utils: model builder, cosine, mirror plot
# ---------------------------------------------------------------------
def build_model(input_dim: int, num_classes: int):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.01)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

def cosine_sim(a: np.ndarray, b: np.ndarray, eps: float = 1e-12) -> float:
    a = np.asarray(a, dtype=float).ravel()
    b = np.asarray(b, dtype=float).ravel()
    n = min(a.size, b.size)
    a = a[:n]; b = b[:n]
    denom = (np.linalg.norm(a) * np.linalg.norm(b)) + eps
    return float(np.dot(a, b) / denom)

def mirror_plot(x, top_y, bottom_y, title, outfile):
    """
    Mirror plot: top_y shown above baseline, bottom_y mirrored below (as negative).
    """
    plt.figure(figsize=(10, 5))
    plt.plot(x, top_y, linewidth=1.0, label="Run A")
    plt.plot(x, -bottom_y, linewidth=1.0, label="Run B (mirrored)")
    plt.axhline(0.0, color="k", linewidth=0.8)
    plt.xlabel("m/z (600..1600 at 0.1 step)")
    plt.ylabel("Gradient magnitude")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    plt.close()

# ---------------------------------------------------------------------
# Config / data
# ---------------------------------------------------------------------
CSV_PATH = r'F:/casts/dataset_rt.csv'   # <- set appropriately
BIN_VALUE = 45
EPOCHS = 50
BATCH_SIZE = 32
k = 5
N_REPEATS = 10

# Two independent training baselines:
RAND_INTS = [100, 777]   # <— you asked for two different rand_int values

OUT_DIR = "./two_run_compare_5x10"
PLOT_DIR = os.path.join(OUT_DIR, "plots")
os.makedirs(PLOT_DIR, exist_ok=True)

# ---------------------------------------------------------------------
# Load + normalize (per your pattern): drop bin, normalize each feature col by (max+1)
# ---------------------------------------------------------------------
df = pd.read_csv(CSV_PATH)
filtered_df = df[df['bin'] == BIN_VALUE].copy()

cols_to_normalize = filtered_df.columns.difference(['bin', 'target'])
filtered_df[cols_to_normalize] = filtered_df[cols_to_normalize].apply(lambda x: x / (x.max() + 1.0))
filtered_df = filtered_df.drop(columns=['bin'])

X_df = filtered_df.copy()
Y = X_df.pop("target").to_numpy()
X = np.nan_to_num(X_df.to_numpy(), copy=False)
input_dim = X.shape[1]
num_classes = int(np.max(Y)) + 1
assert input_dim >= 10000, f"Expected >=10000 features, got {input_dim}"

# Map first 10,000 features to x-grid (600..1600 by 0.1)
x_grid = np.arange(600, 1600, 0.1)  # length 10000

# ---------------------------------------------------------------------
# Function: train full 5x10 and return averaged grad for class 2 vs 0
# ---------------------------------------------------------------------
def train_and_avg_grad(rand_int_base: int) -> np.ndarray:
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    all_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X, Y), 1):
        X_tr, y_tr = X[train_idx], Y[train_idx]
        X_va, y_va = X[val_idx], Y[val_idx]

        for r in range(N_REPEATS):
            seed = rand_int_base * fold + r
            tf.keras.utils.set_random_seed(seed)
            np.random.seed(seed)

            m = build_model(input_dim, num_classes)
            m.fit(
                X_tr, y_tr,
                epochs=EPOCHS,
                batch_size=BATCH_SIZE,
                validation_data=(X_va, y_va),
                verbose=0
            )
            all_models.append(m)
        print(f"[Seed base {rand_int_base}] Fold {fold}/{k} trained {N_REPEATS} models (total so far: {len(all_models)})")

    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
    avg_grad_2_vs_0 = compute_avg_logodds_gradient_for_pair(X_tensor, all_models, class_a=2, class_b=0).numpy()
    return avg_grad_2_vs_0

# ---------------------------------------------------------------------
# Run A and Run B
# ---------------------------------------------------------------------
avg_A = train_and_avg_grad(RAND_INTS[0])
avg_B = train_and_avg_grad(RAND_INTS[1])

# Keep only the first 10k matching the x_grid
yA_full = avg_A.flatten()[:10000]
yB_full = avg_B.flatten()[:10000]

# Build “channels”
#   Pos-only  : keep positives, zeros elsewhere
#   Neg-only  : keep absolute value of negatives, zeros elsewhere
yA_pos = np.where(yA_full > 0, yA_full, 0.0)
yB_pos = np.where(yB_full > 0, yB_full, 0.0)
yA_neg = np.where(yA_full < 0, -yA_full, 0.0)  # abs of neg
yB_neg = np.where(yB_full < 0, -yB_full, 0.0)

# Cosine similarities
cos_full = cosine_sim(yA_pos + yA_neg, yB_pos + yB_neg)
cos_pos  = cosine_sim(yA_pos, yB_pos)
cos_neg  = cosine_sim(yA_neg, yB_neg)

print("\n=== Cosine similarities (Run A vs Run B, class 2 vs 0) ===")
print(f"Full (pos + |neg|): {cos_full:.6f}")
print(f"Pos-only          : {cos_pos:.6f}")
print(f"Neg-only (|neg|)  : {cos_neg:.6f}")

# Save CSV of vectors + similarities
csv_out = os.path.join(OUT_DIR, "run_comparison_vectors.csv")
pd.DataFrame({
    "m/z": x_grid,
    "yA_pos": yA_pos,
    "yA_neg_abs": yA_neg,
    "yB_pos": yB_pos,
    "yB_neg_abs": yB_neg,
}).to_csv(csv_out, index=False)

with open(os.path.join(OUT_DIR, "cosine_summary.json"), "w") as f:
    json.dump({"cos_full": cos_full, "cos_pos": cos_pos, "cos_neg": cos_neg}, f, indent=2)

# ---------------------------------------------------------------------
# Mirror plots (Run A on top; Run B mirrored on bottom)
# ---------------------------------------------------------------------
mirror_plot(x_grid, yA_pos + yA_neg, yB_pos + yB_neg,
            title="Mirror Plot — Full (pos + |neg|), class 2 vs 0",
            outfile=os.path.join(PLOT_DIR, "mirror_full_2v0.png"))

mirror_plot(x_grid, yA_pos, yB_pos,
            title="Mirror Plot — Positive only, class 2 vs 0",
            outfile=os.path.join(PLOT_DIR, "mirror_pos_2v0.png"))

mirror_plot(x_grid, yA_neg, yB_neg,
            title="Mirror Plot — Negative only (|neg|), class 2 vs 0",
            outfile=os.path.join(PLOT_DIR, "mirror_negabs_2v0.png"))

print(f"\nSaved plots -> {PLOT_DIR}")
print(f"Saved vectors -> {csv_out}")
print(f"Saved cosine summary -> {os.path.join(OUT_DIR, 'cosine_summary.json')}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[Seed base 100] Fold 1/5 trained 10 models (total so far: 10)
[Seed base 100] Fold 2/5 trained 10 models (total so far: 20)
[Seed base 100] Fold 3/5 trained 10 models (total so far: 30)
[Seed base 100] Fold 4/5 trained 10 models (total so far: 40)
[Seed base 100] Fold 5/5 trained 10 models (total so far: 50)
[Seed base 777] Fold 1/5 trained 10 models (total so far: 10)
[Seed base 777] Fold 2/5 trained 10 models (total so far: 20)
[Seed base 777] Fold 3/5 trained 10 models (total so far: 30)
[Seed base 777] Fold 4/5 trained 10 models (total so far: 40)
[Seed base 777] Fold 5/5 trained 10 models (total so far: 50)

=== Cosine similarities (Run A vs Run B, class 2 vs 0) ===
Full (pos + |neg|): 0.972631
Pos-only          : 0.974154
Neg-only (|neg|)  : 0.838420

Saved plots -> ./two_run_compare_5x10\plots
Saved vectors -> ./two_run_compare_5x10\run_comparison_vectors.csv
Saved cosine summary -> ./two_run_compare_5x10\cosine_summary.json


In [2]:
# -*- coding: utf-8 -*-
import os
import re
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

# Optional
import seaborn as sns  # noqa: F401

from scipy.signal import find_peaks  # noqa: F401
from scipy.signal import savgol_filter  # noqa: F401
from scipy.ndimage import gaussian_filter1d  # noqa: F401
from scipy.linalg import svd  # noqa: F401

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold

# ---------------------------------------------------------------------
# Repro defaults
# ---------------------------------------------------------------------
np.random.seed(42)
tf.random.set_seed(42)
try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception:
    pass

# ---------------------------------------------------------------------
# (Kept for completeness; unused in main flow)
# ---------------------------------------------------------------------
def helper_regex(text):
    m = re.search(rf"{'Full'}\s+(\w+)", str(text))
    return m.group(1) if m else None

# ---------------------------------------------------------------------
# Gradient helper
# ---------------------------------------------------------------------
@tf.function(reduce_retracing=True)
def _log_odds_grad_for_model(x1, model, class_a, class_b, eps):
    with tf.GradientTape() as tape:
        tape.watch(x1)
        p = model(x1, training=False)  # (1, C)
        log_odds = tf.math.log(p[:, class_a] + eps) - tf.math.log(p[:, class_b] + eps)
    g = tape.gradient(log_odds, x1)    # (1, D)
    return tf.squeeze(g, axis=0)       # (D,)

def compute_avg_logodds_gradient_for_pair(X: tf.Tensor, models: list, class_a: int, class_b: int, eps: float = 1e-8):
    """
    Average input gradient of log p(class_a|x) - log p(class_b|x) across samples and models.
    Returns (D,) tensor.
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    N, D = X.shape[0], X.shape[1]
    sample_grads = []
    for i in range(N):
        x_i = X[i:i+1]  # (1, D)
        grads_over_models = []
        for m in models:
            g = _log_odds_grad_for_model(x_i, m, class_a, class_b, eps)
            grads_over_models.append(g)
        g_avg_models = tf.reduce_mean(tf.stack(grads_over_models, axis=0), axis=0)  # (D,)
        sample_grads.append(g_avg_models)
    avg_grad = tf.reduce_mean(tf.stack(sample_grads, axis=0), axis=0)  # (D,)
    return avg_grad

# ---------------------------------------------------------------------
# Utils: model builder, cosine, mirror plot
# ---------------------------------------------------------------------
def build_model(input_dim: int, num_classes: int):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.01)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

def cosine_sim(a: np.ndarray, b: np.ndarray, eps: float = 1e-12) -> float:
    a = np.asarray(a, dtype=float).ravel()
    b = np.asarray(b, dtype=float).ravel()
    n = min(a.size, b.size)
    a = a[:n]; b = b[:n]
    denom = (np.linalg.norm(a) * np.linalg.norm(b)) + eps
    return float(np.dot(a, b) / denom)

def mirror_plot(x, top_y, bottom_y, title, outfile):
    """
    Mirror plot: top_y shown above baseline, bottom_y mirrored below (as negative).
    """
    plt.figure(figsize=(10, 5))
    plt.plot(x, top_y, linewidth=1.0, label="Run A")
    plt.plot(x, -bottom_y, linewidth=1.0, label="Run B (mirrored)")
    plt.axhline(0.0, color="k", linewidth=0.8)
    plt.xlabel("m/z (600..1600 at 0.1 step)")
    plt.ylabel("Gradient magnitude")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    plt.close()

# ---------------------------------------------------------------------
# Config / data
# ---------------------------------------------------------------------
CSV_PATH = r'F:/casts/dataset_rt.csv'   # <- set appropriately
BIN_VALUE = 45
EPOCHS = 50
BATCH_SIZE = 32
k = 5
N_REPEATS = 10

# Two independent training baselines:
RAND_INTS = [100, 777]   # two distinct rand_int bases

OUT_DIR = "./two_run_compare_5x10_multi"
PLOT_DIR = os.path.join(OUT_DIR, "plots")
os.makedirs(PLOT_DIR, exist_ok=True)

# Class pairs to analyze
CLASS_PAIRS = [(1, 0), (2, 0), (3, 0)]

# ---------------------------------------------------------------------
# Load + normalize (drop bin; per-column x/(max+1))
# ---------------------------------------------------------------------
df = pd.read_csv(CSV_PATH)
filtered_df = df[df['bin'] == BIN_VALUE].copy()

cols_to_normalize = filtered_df.columns.difference(['bin', 'target'])
filtered_df[cols_to_normalize] = filtered_df[cols_to_normalize].apply(lambda x: x / (x.max() + 1.0))
filtered_df = filtered_df.drop(columns=['bin'])

X_df = filtered_df.copy()
Y = X_df.pop("target").to_numpy()
X = np.nan_to_num(X_df.to_numpy(), copy=False)
input_dim = X.shape[1]
num_classes = int(np.max(Y)) + 1
assert input_dim >= 10000, f"Expected >=10000 features, got {input_dim}"

# x-grid for first 10k features
x_grid = np.arange(600, 1600, 0.1)  # len 10000

# ---------------------------------------------------------------------
# Train a full 5x10 run and get averaged grads for requested pairs
# ---------------------------------------------------------------------
def train_and_avg_grads(rand_int_base: int, pairs: list):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    all_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X, Y), 1):
        X_tr, y_tr = X[train_idx], Y[train_idx]
        X_va, y_va = X[val_idx], Y[val_idx]
        for r in range(N_REPEATS):
            seed = rand_int_base * fold + r
            tf.keras.utils.set_random_seed(seed)
            np.random.seed(seed)
            m = build_model(input_dim, num_classes)
            m.fit(X_tr, y_tr,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE,
                  validation_data=(X_va, y_va),
                  verbose=0)
            all_models.append(m)
        print(f"[Seed base {rand_int_base}] Fold {fold}/{k} trained {N_REPEATS} models (total so far: {len(all_models)})")

    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
    grads = {}
    for (a, b) in pairs:
        if a >= num_classes or b >= num_classes:
            print(f"Skipping pair ({a} vs {b}) — class index out of range (num_classes={num_classes}).")
            continue
        grads[(a, b)] = compute_avg_logodds_gradient_for_pair(X_tensor, all_models, class_a=a, class_b=b).numpy()
    return grads

# ---------------------------------------------------------------------
# Run A and Run B (each returns dict {(a,b): grad_vec})
# ---------------------------------------------------------------------
avg_A = train_and_avg_grads(RAND_INTS[0], CLASS_PAIRS)
avg_B = train_and_avg_grads(RAND_INTS[1], CLASS_PAIRS)

# ---------------------------------------------------------------------
# For each pair: build channels, compute cosine, save vectors, make plots
# ---------------------------------------------------------------------
overall_summary = {}
for (a, b) in CLASS_PAIRS:
    key = f"{a}v{b}"
    if (a, b) not in avg_A or (a, b) not in avg_B:
        continue

    yA_full = avg_A[(a, b)].flatten()[:10000]
    yB_full = avg_B[(a, b)].flatten()[:10000]

    # Pos-only (keep positives), Neg-only (abs of negatives)
    yA_pos = np.where(yA_full > 0, yA_full, 0.0)
    yB_pos = np.where(yB_full > 0, yB_full, 0.0)
    yA_neg = np.where(yA_full < 0, -yA_full, 0.0)
    yB_neg = np.where(yB_full < 0, -yB_full, 0.0)

    # Cosines
    cos_full = cosine_sim(yA_pos + yA_neg, yB_pos + yB_neg)
    cos_pos  = cosine_sim(yA_pos, yB_pos)
    cos_neg  = cosine_sim(yA_neg, yB_neg)

    print(f"\n=== {key}: Cosine similarities (Run A vs Run B) ===")
    print(f"Full (pos + |neg|): {cos_full:.6f}")
    print(f"Pos-only          : {cos_pos:.6f}")
    print(f"Neg-only (|neg|)  : {cos_neg:.6f}")

    # Save vectors
    pair_dir = os.path.join(OUT_DIR, key)
    pair_plot_dir = os.path.join(PLOT_DIR, key)
    os.makedirs(pair_dir, exist_ok=True)
    os.makedirs(pair_plot_dir, exist_ok=True)

    pd.DataFrame({
        "m/z": x_grid,
        "yA_pos": yA_pos,
        "yA_neg_abs": yA_neg,
        "yB_pos": yB_pos,
        "yB_neg_abs": yB_neg,
    }).to_csv(os.path.join(pair_dir, f"run_vectors_{key}.csv"), index=False)

    # Save cosine summary for the pair
    pair_summary = {"cos_full": cos_full, "cos_pos": cos_pos, "cos_neg": cos_neg}
    with open(os.path.join(pair_dir, f"cosine_summary_{key}.json"), "w") as f:
        json.dump(pair_summary, f, indent=2)
    overall_summary[key] = pair_summary

    # Mirror plots
    mirror_plot(x_grid, yA_pos + yA_neg, yB_pos + yB_neg,
                title=f"Mirror — Full (pos + |neg|), class {a} vs {b}",
                outfile=os.path.join(pair_plot_dir, f"mirror_full_{key}.png"))
    mirror_plot(x_grid, yA_pos, yB_pos,
                title=f"Mirror — Positive only, class {a} vs {b}",
                outfile=os.path.join(pair_plot_dir, f"mirror_pos_{key}.png"))
    mirror_plot(x_grid, yA_neg, yB_neg,
                title=f"Mirror — Negative only (|neg|), class {a} vs {b}",
                outfile=os.path.join(pair_plot_dir, f"mirror_negabs_{key}.png"))

# Save an overall JSON summary across all pairs
with open(os.path.join(OUT_DIR, "cosine_summary_all_pairs.json"), "w") as f:
    json.dump(overall_summary, f, indent=2)

print(f"\nOutputs saved under: {OUT_DIR}")
print(f"Plots saved under:   {PLOT_DIR}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[Seed base 100] Fold 1/5 trained 10 models (total so far: 10)
[Seed base 100] Fold 2/5 trained 10 models (total so far: 20)
[Seed base 100] Fold 3/5 trained 10 models (total so far: 30)
[Seed base 100] Fold 4/5 trained 10 models (total so far: 40)
[Seed base 100] Fold 5/5 trained 10 models (total so far: 50)
[Seed base 777] Fold 1/5 trained 10 models (total so far: 10)
[Seed base 777] Fold 2/5 trained 10 models (total so far: 20)
[Seed base 777] Fold 3/5 trained 10 models (total so far: 30)
[Seed base 777] Fold 4/5 trained 10 models (total so far: 40)
[Seed base 777] Fold 5/5 trained 10 models (total so far: 50)

=== 1v0: Cosine similarities (Run A vs Run B) ===
Full (pos + |neg|): 0.971782
Pos-only          : 0.973655
Neg-only (|neg|)  : 0.848608

=== 2v0: Cosine similarities (Run A vs Run B) ===
Full (pos + |neg|): 0.972631
Pos-only          : 0.974154
Neg-only (|neg|)  : 0.838420

=== 3v0: Cosine similarities (Run A vs Run B) ===
Full (pos + |neg|): 0.937225
Pos-only          : 0.69

In [None]:
# -*- coding: utf-8 -*-
import os
import re
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

# Optional (not used in plotting)
import seaborn as sns  # noqa: F401

from scipy.signal import find_peaks  # noqa: F401
from scipy.signal import savgol_filter  # noqa: F401
from scipy.ndimage import gaussian_filter1d  # noqa: F401
from scipy.linalg import svd  # noqa: F401

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold

# ---------------------------------------------------------------------
# Repro defaults (library nondeterminism tamed as much as feasible)
# ---------------------------------------------------------------------
np.random.seed(42)
tf.random.set_seed(42)
try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception:
    pass

# ---------------------------------------------------------------------
# Minimal helper kept for completeness (unused in main flow)
# ---------------------------------------------------------------------
def helper_regex(text):
    m = re.search(rf"{'Full'}\s+(\w+)", str(text))
    return m.group(1) if m else None

# ---------------------------------------------------------------------
# Gradient helpers
# ---------------------------------------------------------------------
@tf.function(reduce_retracing=True)
def _log_odds_grad_for_model(x1, model, class_a, class_b, eps):
    with tf.GradientTape() as tape:
        tape.watch(x1)
        p = model(x1, training=False)  # (1, C)
        log_odds = tf.math.log(p[:, class_a] + eps) - tf.math.log(p[:, class_b] + eps)
    g = tape.gradient(log_odds, x1)    # (1, D)
    return tf.squeeze(g, axis=0)       # (D,)

def compute_avg_logodds_gradient_for_pair(X: tf.Tensor, models: list, class_a: int, class_b: int, eps: float = 1e-8):
    """
    Average input gradient of log p(class_a|x) - log p(class_b|x) across samples and models.
    Returns (D,) tensor (float32 NumPy when .numpy()).
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    N, D = X.shape[0], X.shape[1]
    sample_grads = []

    for i in range(N):
        x_i = X[i:i+1]  # (1, D)
        grads_over_models = []
        for m in models:
            g = _log_odds_grad_for_model(x_i, m, class_a, class_b, eps)
            grads_over_models.append(g)
        g_avg_models = tf.reduce_mean(tf.stack(grads_over_models, axis=0), axis=0)  # (D,)
        sample_grads.append(g_avg_models)

    avg_grad = tf.reduce_mean(tf.stack(sample_grads, axis=0), axis=0)  # (D,)
    return avg_grad

# ---------------------------------------------------------------------
# Utils: model, cosine, mirror plotting
# ---------------------------------------------------------------------
def build_model(input_dim: int, num_classes: int):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.01)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

def cosine_sim(a: np.ndarray, b: np.ndarray, eps: float = 1e-12) -> float:
    a = np.asarray(a, dtype=float).ravel()
    b = np.asarray(b, dtype=float).ravel()
    n = min(a.size, b.size)
    a = a[:n]; b = b[:n]
    denom = (np.linalg.norm(a) * np.linalg.norm(b)) + eps
    return float(np.dot(a, b) / denom)

def mirror_plot(x, top_y, bottom_y, title, outfile):
    """
    Mirror plot: top_y drawn above baseline; bottom_y mirrored below (negative).
    """
    plt.figure(figsize=(10, 5))
    plt.plot(x, top_y, linewidth=1.0, label="Run A")
    plt.plot(x, -bottom_y, linewidth=1.0, label="Run B (mirrored)")
    plt.axhline(0.0, color="k", linewidth=0.8)
    plt.xlabel("m/z (600..1600 at 0.1 step)")
    plt.ylabel("Gradient magnitude")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    plt.close()

# ---------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------
CSV_PATH   = r"F:/casts/dataset_half.csv"  # <- adjust
EPOCHS     = 50
BATCH_SIZE = 32
K_SPLITS   = 5
N_REPEATS  = 10

# Two independent 5x10 runs per bin:
RAND_INTS  = [100, 777]

# Class pairs you wanted:
CLASS_PAIRS = [(1, 0), (2, 0), (3, 0)]

# Optionally limit how many bins to process (None = all)
BINS_LIMIT = None  # e.g., set to 3 for a quick smoke test

OUT_DIR  = "./all_bins_two_run_compare_5x10"
PLOT_DIR = os.path.join(OUT_DIR, "plots")
os.makedirs(PLOT_DIR, exist_ok=True)

# ---------------------------------------------------------------------
# Load once; enumerate bin values
# ---------------------------------------------------------------------
df = pd.read_csv(CSV_PATH)
all_bins = sorted([b for b in df["bin"].dropna().unique().tolist()])

if BINS_LIMIT is not None:
    all_bins = all_bins[:int(BINS_LIMIT)]

print(f"Discovered {len(all_bins)} bin(s): {all_bins}")

# ---------------------------------------------------------------------
# Per-run trainer that returns averaged grads for requested pairs
# ---------------------------------------------------------------------
def train_and_avg_grads(X: np.ndarray, Y: np.ndarray, rand_int_base: int, pairs: list, num_classes: int):
    kf = KFold(n_splits=K_SPLITS, shuffle=True, random_state=42)
    all_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X, Y), 1):
        X_tr, y_tr = X[train_idx], Y[train_idx]
        X_va, y_va = X[val_idx], Y[val_idx]
        for r in range(N_REPEATS):
            seed = rand_int_base * fold + r
            tf.keras.utils.set_random_seed(seed)
            np.random.seed(seed)
            m = build_model(X.shape[1], num_classes)
            m.fit(X_tr, y_tr,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE,
                  validation_data=(X_va, y_va),
                  verbose=0)
            all_models.append(m)
        print(f"[Seed base {rand_int_base}] Fold {fold}/{K_SPLITS} trained {N_REPEATS} models (total so far: {len(all_models)})")

    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
    grads = {}
    for (a, b) in pairs:
        if a >= num_classes or b >= num_classes:
            print(f"  Skipping pair ({a} vs {b}) — class index out of range (num_classes={num_classes}).")
            continue
        grads[(a, b)] = compute_avg_logodds_gradient_for_pair(X_tensor, all_models, class_a=a, class_b=b).numpy()
    return grads

# ---------------------------------------------------------------------
# Main loop over bins
# ---------------------------------------------------------------------
overall_summary = {}

for bin_value in all_bins:
    print(f"\n================= BIN {bin_value} =================")
    # Filter this bin and normalize columns except ['bin','target'] by (max+1)
    fdf = df[df["bin"] == bin_value].copy()
    if fdf.empty:
        print(f"  Bin {bin_value}: no rows — skipping.")
        continue

    cols_to_norm = fdf.columns.difference(['bin', 'target'])
    fdf[cols_to_norm] = fdf[cols_to_norm].apply(lambda x: x / (x.max() + 1.0))
    fdf = fdf.drop(columns=['bin'])

    X_df = fdf.copy()
    Y = X_df.pop("target").to_numpy()
    X = np.nan_to_num(X_df.to_numpy(), copy=False)

    input_dim   = X.shape[1]
    num_classes = int(np.max(Y)) + 1

    if input_dim < 1 or X.shape[0] < 2:
        print(f"  Bin {bin_value}: insufficient data (samples={X.shape[0]}, dim={input_dim}) — skipping.")
        continue

    # Build the x-grid up to min(10000, input_dim)
    n_grid = min(10000, input_dim)
    x_grid = np.arange(600, 600 + 0.1 * n_grid, 0.1)  # len n_grid

    # Two independent runs (each returns {(a,b): grad_vec})
    avg_A = train_and_avg_grads(X, Y, RAND_INTS[0], CLASS_PAIRS, num_classes)
    avg_B = train_and_avg_grads(X, Y, RAND_INTS[1], CLASS_PAIRS, num_classes)

    bin_summary = {}
    bin_dir      = os.path.join(OUT_DIR, f"bin_{bin_value}")
    bin_plot_dir = os.path.join(PLOT_DIR, f"bin_{bin_value}")
    os.makedirs(bin_dir, exist_ok=True)
    os.makedirs(bin_plot_dir, exist_ok=True)

    for (a, b) in CLASS_PAIRS:
        key = f"{a}v{b}"
        if (a, b) not in avg_A or (a, b) not in avg_B:
            print(f"  Bin {bin_value}: pair {key} missing — skipped.")
            continue

        yA_full = avg_A[(a, b)].flatten()[:n_grid]
        yB_full = avg_B[(a, b)].flatten()[:n_grid]

        # Channels: pos keep; neg -> absolute value
        yA_pos = np.where(yA_full > 0, yA_full, 0.0)
        yB_pos = np.where(yB_full > 0, yB_full, 0.0)
        yA_neg = np.where(yA_full < 0, -yA_full, 0.0)
        yB_neg = np.where(yB_full < 0, -yB_full, 0.0)

        # Cosines
        cos_full = cosine_sim(yA_pos + yA_neg, yB_pos + yB_neg)
        cos_pos  = cosine_sim(yA_pos, yB_pos)
        cos_neg  = cosine_sim(yA_neg, yB_neg)

        print(f"  Bin {bin_value} — {key}: cos_full={cos_full:.6f}  cos_pos={cos_pos:.6f}  cos_neg={cos_neg:.6f}")

        # Save vectors for this bin/pair
        pair_dir = os.path.join(bin_dir, key)
        pair_plot_dir = os.path.join(bin_plot_dir, key)
        os.makedirs(pair_dir, exist_ok=True)
        os.makedirs(pair_plot_dir, exist_ok=True)

        pd.DataFrame({
            "m/z": x_grid,
            "yA_pos": yA_pos,
            "yA_neg_abs": yA_neg,
            "yB_pos": yB_pos,
            "yB_neg_abs": yB_neg,
        }).to_csv(os.path.join(pair_dir, f"run_vectors_{key}.csv"), index=False)

        # Pair-level JSON
        pair_summary = {"cos_full": cos_full, "cos_pos": cos_pos, "cos_neg": cos_neg}
        with open(os.path.join(pair_dir, f"cosine_summary_{key}.json"), "w") as f:
            json.dump(pair_summary, f, indent=2)

        # Plots
        mirror_plot(x_grid, yA_pos + yA_neg, yB_pos + yB_neg,
                    title=f"BIN {bin_value} — Mirror Full (pos+|neg|) {key}",
                    outfile=os.path.join(pair_plot_dir, f"mirror_full_{key}.png"))
        mirror_plot(x_grid, yA_pos, yB_pos,
                    title=f"BIN {bin_value} — Mirror Positive {key}",
                    outfile=os.path.join(pair_plot_dir, f"mirror_pos_{key}.png"))
        mirror_plot(x_grid, yA_neg, yB_neg,
                    title=f"BIN {bin_value} — Mirror Negative (|neg|) {key}",
                    outfile=os.path.join(pair_plot_dir, f"mirror_negabs_{key}.png"))

        bin_summary[key] = pair_summary

    # Save per-bin summary
    with open(os.path.join(bin_dir, "cosine_summary_all_pairs.json"), "w") as f:
        json.dump(bin_summary, f, indent=2)

    overall_summary[str(bin_value)] = bin_summary

# ---------------------------------------------------------------------
# Save the full multi-bin summary
# ---------------------------------------------------------------------
with open(os.path.join(OUT_DIR, "ALL_BINS_cosine_summary.json"), "w") as f:
    json.dump(overall_summary, f, indent=2)

print("\nDone.")
print(f"Outputs rooted at: {OUT_DIR}")
print(f"Plots rooted at:   {PLOT_DIR}")


In [None]:
# -*- coding: utf-8 -*-
import os
import re
import json
import gc
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

# Optional (not used in plotting)
import seaborn as sns  # noqa: F401

from scipy.signal import find_peaks  # noqa: F401
from scipy.signal import savgol_filter  # noqa: F401
from scipy.ndimage import gaussian_filter1d  # noqa: F401
from scipy.linalg import svd  # noqa: F401

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold

# ---------------------------------------------------------------------
# Repro defaults (library nondeterminism tamed as much as feasible)
# ---------------------------------------------------------------------
np.random.seed(42)
tf.random.set_seed(42)
try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception:
    pass

# ---------------------------------------------------------------------
# Minimal helper kept for completeness (unused in main flow)
# ---------------------------------------------------------------------
def helper_regex(text):
    m = re.search(rf"{'Full'}\s+(\w+)", str(text))
    return m.group(1) if m else None

# ---------------------------------------------------------------------
# Gradient helpers
# ---------------------------------------------------------------------
@tf.function(reduce_retracing=True)
def _log_odds_grad_for_model(x1, model, class_a, class_b, eps):
    with tf.GradientTape() as tape:
        tape.watch(x1)
        p = model(x1, training=False)  # (1, C)
        log_odds = tf.math.log(p[:, class_a] + eps) - tf.math.log(p[:, class_b] + eps)
    g = tape.gradient(log_odds, x1)    # (1, D)
    return tf.squeeze(g, axis=0)       # (D,)

def compute_avg_logodds_gradient_for_pair(X: tf.Tensor, models: list, class_a: int, class_b: int, eps: float = 1e-8):
    """
    Average input gradient of log p(class_a|x) - log p(class_b|x) across samples and models.
    Returns (D,) tensor (float32 NumPy when .numpy()).
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    N, D = X.shape[0], X.shape[1]
    sample_grads = []

    for i in range(N):
        x_i = X[i:i+1]  # (1, D)
        grads_over_models = []
        for m in models:
            g = _log_odds_grad_for_model(x_i, m, class_a, class_b, eps)
            grads_over_models.append(g)
        g_avg_models = tf.reduce_mean(tf.stack(grads_over_models, axis=0), axis=0)  # (D,)
        sample_grads.append(g_avg_models)

    avg_grad = tf.reduce_mean(tf.stack(sample_grads, axis=0), axis=0)  # (D,)
    return avg_grad

# ---------------------------------------------------------------------
# Utils: model, cosine, mirror plotting
# ---------------------------------------------------------------------
def build_model(input_dim: int, num_classes: int):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.01)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

def cosine_sim(a: np.ndarray, b: np.ndarray, eps: float = 1e-12) -> float:
    a = np.asarray(a, dtype=float).ravel()
    b = np.asarray(b, dtype=float).ravel()
    n = min(a.size, b.size)
    a = a[:n]; b = b[:n]
    denom = (np.linalg.norm(a) * np.linalg.norm(b)) + eps
    return float(np.dot(a, b) / denom)

def mirror_plot(x, top_y, bottom_y, title, outfile):
    """
    Mirror plot: top_y drawn above baseline; bottom_y mirrored below (negative).
    """
    plt.figure(figsize=(10, 5))
    plt.plot(x, top_y, linewidth=1.0, label="Run A")
    plt.plot(x, -bottom_y, linewidth=1.0, label="Run B (mirrored)")
    plt.axhline(0.0, color="k", linewidth=0.8)
    plt.xlabel("m/z (600..1600 at 0.1 step)")
    plt.ylabel("Gradient magnitude")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    plt.close()

# ---------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------
CSV_PATH   = r"F:/casts/dataset_half.csv"  # <- adjust
EPOCHS     = 50
BATCH_SIZE = 32
K_SPLITS   = 5
N_REPEATS  = 10

# Two independent 5x10 runs per bin:
RAND_INTS  = [100, 777]

# Class pairs you wanted:
CLASS_PAIRS = [(1, 0), (2, 0), (3, 0)]

# Optionally limit how many bins to process (None = all)
BINS_LIMIT = None  # e.g., set to 3 for a quick smoke test

OUT_DIR  = "./all_bins_two_run_compare_5x10_batches"
PLOT_DIR = os.path.join(OUT_DIR, "plots")
os.makedirs(PLOT_DIR, exist_ok=True)

# ---------------------------------------------------------------------
# Load once; enumerate bin values
# ---------------------------------------------------------------------
# df = pd.read_csv(CSV_PATH)
# all_bins = sorted([b for b in df["bin"].dropna().unique().tolist()])

# if BINS_LIMIT is not None:
#     all_bins = all_bins[:int(BINS_LIMIT)]

# print(f"Discovered {len(all_bins)} bin(s): {all_bins}")

df = pd.read_csv(CSV_PATH)
all_bins = sorted([b for b in df["bin"].dropna().unique().tolist()])[-6:]  # last 6 only
print(f"Discovered {df['bin'].nunique()} unique bin(s) total; running last {len(all_bins)}: {all_bins}")


# ---------------------------------------------------------------------
# Per-run trainer that returns averaged grads for requested pairs
#  - Cleans up models/graph/tensors BEFORE returning to free memory
# ---------------------------------------------------------------------
def train_and_avg_grads(X: np.ndarray, Y: np.ndarray, rand_int_base: int, pairs: list, num_classes: int):
    kf = KFold(n_splits=K_SPLITS, shuffle=True, random_state=42)
    all_models = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X, Y), 1):
        X_tr, y_tr = X[train_idx], Y[train_idx]
        X_va, y_va = X[val_idx], Y[val_idx]
        for r in range(N_REPEATS):
            seed = rand_int_base * fold + r
            tf.keras.utils.set_random_seed(seed)
            np.random.seed(seed)
            m = build_model(X.shape[1], num_classes)
            m.fit(X_tr, y_tr,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE,
                  validation_data=(X_va, y_va),
                  verbose=0)
            all_models.append(m)
        print(f"[Seed base {rand_int_base}] Fold {fold}/{K_SPLITS} trained {N_REPEATS} models (total so far: {len(all_models)})")

    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
    grads = {}
    for (a, b) in pairs:
        if a >= num_classes or b >= num_classes:
            print(f"  Skipping pair ({a} vs {b}) — class index out of range (num_classes={num_classes}).")
            continue
        grads[(a, b)] = compute_avg_logodds_gradient_for_pair(X_tensor, all_models, class_a=a, class_b=b).numpy()

    # ---------- Aggressive cleanup of models/graph ----------
    try:
        for m in all_models:
            del m
    except Exception:
        pass
    del all_models, X_tensor
    tf.keras.backend.clear_session()
    gc.collect()
    # --------------------------------------------------------

    return grads

# ---------------------------------------------------------------------
# Per-bin processing
# ---------------------------------------------------------------------
def process_single_bin(bin_value, overall_summary):
    print(f"\n================= BIN {bin_value} =================")
    # Filter this bin and normalize columns except ['bin','target'] by (max+1)
    fdf = df[df["bin"] == bin_value].copy()
    if fdf.empty:
        print(f"  Bin {bin_value}: no rows — skipping.")
        return

    cols_to_norm = fdf.columns.difference(['bin', 'target'])
    fdf[cols_to_norm] = fdf[cols_to_norm].apply(lambda x: x / (x.max() + 1.0))
    fdf = fdf.drop(columns=['bin'])

    X_df = fdf.copy()
    Y = X_df.pop("target").to_numpy()
    X = np.nan_to_num(X_df.to_numpy(), copy=False)

    input_dim   = X.shape[1]
    num_classes = int(np.max(Y)) + 1

    if input_dim < 1 or X.shape[0] < 2:
        print(f"  Bin {bin_value}: insufficient data (samples={X.shape[0]}, dim={input_dim}) — skipping.")
        return

    # Build the x-grid up to min(10000, input_dim)
    n_grid = min(10000, input_dim)
    x_grid = np.arange(600, 600 + 0.1 * n_grid, 0.1)  # len n_grid

    # Two independent runs (each returns {(a,b): grad_vec})
    avg_A = train_and_avg_grads(X, Y, RAND_INTS[0], CLASS_PAIRS, num_classes)
    avg_B = train_and_avg_grads(X, Y, RAND_INTS[1], CLASS_PAIRS, num_classes)

    bin_summary = {}
    bin_dir      = os.path.join(OUT_DIR, f"bin_{bin_value}")
    bin_plot_dir = os.path.join(PLOT_DIR, f"bin_{bin_value}")
    os.makedirs(bin_dir, exist_ok=True)
    os.makedirs(bin_plot_dir, exist_ok=True)

    for (a, b) in CLASS_PAIRS:
        key = f"{a}v{b}"
        if (a, b) not in avg_A or (a, b) not in avg_B:
            print(f"  Bin {bin_value}: pair {key} missing — skipped.")
            continue

        yA_full = avg_A[(a, b)].flatten()[:n_grid]
        yB_full = avg_B[(a, b)].flatten()[:n_grid]

        # Channels: pos keep; neg -> absolute value
        yA_pos = np.where(yA_full > 0, yA_full, 0.0)
        yB_pos = np.where(yB_full > 0, yB_full, 0.0)
        yA_neg = np.where(yA_full < 0, -yA_full, 0.0)
        yB_neg = np.where(yB_full < 0, -yB_full, 0.0)

        # Cosines
        cos_full = cosine_sim(yA_pos + yA_neg, yB_pos + yB_neg)
        cos_pos  = cosine_sim(yA_pos, yB_pos)
        cos_neg  = cosine_sim(yA_neg, yB_neg)

        print(f"  Bin {bin_value} — {key}: cos_full={cos_full:.6f}  cos_pos={cos_pos:.6f}  cos_neg={cos_neg:.6f}")

        # Save vectors for this bin/pair
        pair_dir = os.path.join(bin_dir, key)
        pair_plot_dir = os.path.join(bin_plot_dir, key)
        os.makedirs(pair_dir, exist_ok=True)
        os.makedirs(pair_plot_dir, exist_ok=True)

        pd.DataFrame({
            "m/z": x_grid,
            "yA_pos": yA_pos,
            "yA_neg_abs": yA_neg,
            "yB_pos": yB_pos,
            "yB_neg_abs": yB_neg,
        }).to_csv(os.path.join(pair_dir, f"run_vectors_{key}.csv"), index=False)

        # Pair-level JSON
        pair_summary = {"cos_full": cos_full, "cos_pos": cos_pos, "cos_neg": cos_neg}
        with open(os.path.join(pair_dir, f"cosine_summary_{key}.json"), "w") as f:
            json.dump(pair_summary, f, indent=2)

        # Plots
        mirror_plot(x_grid, yA_pos + yA_neg, yB_pos + yB_neg,
                    title=f"BIN {bin_value} — Mirror Full (pos+|neg|) {key}",
                    outfile=os.path.join(pair_plot_dir, f"mirror_full_{key}.png"))
        mirror_plot(x_grid, yA_pos, yB_pos,
                    title=f"BIN {bin_value} — Mirror Positive {key}",
                    outfile=os.path.join(pair_plot_dir, f"mirror_pos_{key}.png"))
        mirror_plot(x_grid, yA_neg, yB_neg,
                    title=f"BIN {bin_value} — Mirror Negative (|neg|) {key}",
                    outfile=os.path.join(pair_plot_dir, f"mirror_negabs_{key}.png"))

        bin_summary[key] = pair_summary

    # Save per-bin summary
    with open(os.path.join(bin_dir, "cosine_summary_all_pairs.json"), "w") as f:
        json.dump(bin_summary, f, indent=2)

    overall_summary[str(bin_value)] = bin_summary

    # -------- Per-bin cleanup (free CPU/GPU/TF graph memory) --------
    try:
        del fdf, X_df, X, Y, x_grid, yA_full, yB_full, yA_pos, yB_pos, yA_neg, yB_neg, avg_A, avg_B
    except Exception:
        pass
    tf.keras.backend.clear_session()
    gc.collect()
    # ----------------------------------------------------------------

# ---------------------------------------------------------------------
# Batch runner: process bins in groups of 4 with cleanup between batches
# ---------------------------------------------------------------------
def process_bins_in_batches(bins, batch_size=4):
    overall_summary = {}
    n = len(bins)
    for i in range(0, n, batch_size):
        batch = bins[i:i+batch_size]
        print("\n" + "="*68)
        print(f"Processing bin batch {i//batch_size + 1} of {(n + batch_size - 1)//batch_size}: {batch}")
        print("="*68)

        for b in batch:
            process_single_bin(b, overall_summary)

        # -------- Between-batch cleanup --------
        tf.keras.backend.clear_session()
        gc.collect()
        # ---------------------------------------

    # Save the full multi-bin summary
    with open(os.path.join(OUT_DIR, "ALL_BINS_cosine_summary.json"), "w") as f:
        json.dump(overall_summary, f, indent=2)

    print("\nDone.")
    print(f"Outputs rooted at: {OUT_DIR}")
    print(f"Plots rooted at:   {PLOT_DIR}")

# ---------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------
if __name__ == "__main__":
    process_bins_in_batches(all_bins, batch_size=4)


In [4]:
# -*- coding: utf-8 -*-
import os, re, json, gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Optional (not used in plotting)
import seaborn as sns  # noqa: F401
from scipy.signal import find_peaks  # noqa: F401
from scipy.signal import savgol_filter  # noqa: F401
from scipy.ndimage import gaussian_filter1d  # noqa: F401
from scipy.linalg import svd  # noqa: F401

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold

# ----------------------------
# Repro & GPU memory growth
# ----------------------------
np.random.seed(42)
tf.random.set_seed(42)
try:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception:
    pass

def helper_regex(text):
    m = re.search(rf"{'Full'}\s+(\w+)", str(text))
    return m.group(1) if m else None

# ----------------------------
# Grouped log-odds gradient (keep model as-is, multi-class)
# ----------------------------
@tf.function(reduce_retracing=True)
def _group_logodds_grad_for_model(x1, model, pos_ids, neg_ids, eps):
    """
    Gradient wrt inputs of log(sum_{i in pos_ids} p_i) - log(sum_{j in neg_ids} p_j).
    x1: (1, D)
    """
    pos_ids = tf.constant(pos_ids, dtype=tf.int32)
    neg_ids = tf.constant(neg_ids, dtype=tf.int32)
    with tf.GradientTape() as tape:
        tape.watch(x1)
        p = model(x1, training=False)  # (1, C)
        p_pos = tf.reduce_sum(tf.gather(p, pos_ids, axis=1), axis=1)  # (1,)
        p_neg = tf.reduce_sum(tf.gather(p, neg_ids, axis=1), axis=1)  # (1,)
        log_odds = tf.math.log(p_pos + eps) - tf.math.log(p_neg + eps)
    g = tape.gradient(log_odds, x1)  # (1, D)
    return tf.squeeze(g, axis=0)     # (D,)

def compute_avg_group_logodds_gradient(
    X: np.ndarray, models: list, pos_ids=(2,3), neg_ids=(0,1), eps: float = 1e-8
) -> np.ndarray:
    """
    Average input gradient across samples and models.
    Returns (D,) np.ndarray.
    """
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    N, D = X.shape[0], X.shape[1]
    sample_grads = []
    for i in range(N):
        x_i = X[i:i+1]  # (1, D)
        grads_over_models = []
        for m in models:
            g = _group_logodds_grad_for_model(x_i, m, pos_ids, neg_ids, eps)
            grads_over_models.append(g)
        g_avg_models = tf.reduce_mean(tf.stack(grads_over_models, axis=0), axis=0)  # (D,)
        sample_grads.append(g_avg_models)
    avg_grad = tf.reduce_mean(tf.stack(sample_grads, axis=0), axis=0)  # (D,)
    return avg_grad.numpy()

# ----------------------------
# Model (unchanged)
# ----------------------------
def build_model(input_dim: int, num_classes: int):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.01)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

# ----------------------------
# Cosine & plotting helpers
# ----------------------------
def cosine_sim(a: np.ndarray, b: np.ndarray, eps: float = 1e-12) -> float:
    a = np.asarray(a, dtype=float).ravel()
    b = np.asarray(b, dtype=float).ravel()
    n = min(a.size, b.size)
    a = a[:n]; b = b[:n]
    denom = (np.linalg.norm(a) * np.linalg.norm(b)) + eps
    return float(np.dot(a, b) / denom)

def mirror_plot(x, top_y, bottom_y, title, outfile):
    """
    Mirror plot: top_y drawn above baseline; bottom_y mirrored below (negative).
    """
    plt.figure(figsize=(10, 5))
    plt.plot(x, top_y, linewidth=1.0, label="Run A")
    plt.plot(x, -bottom_y, linewidth=1.0, label="Run B (mirrored)")
    plt.axhline(0.0, linewidth=0.8)
    plt.xlabel("m/z (approx grid)")
    plt.ylabel("Gradient magnitude")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.savefig(outfile, dpi=200)
    plt.close()

# ----------------------------
# Config (adjust path as needed)
# ----------------------------
CSV_PATH   = r"F:/casts/dataset_rt.csv"  # <- adjust
BIN_VALUE  = 45
EPOCHS     = 50
BATCH_SIZE = 32
K_SPLITS   = 5
N_REPEATS  = 10

# Two independent runs
SEED_BASES = [111, 777]

OUT_DIR  = "./bin45_group_compare_two_runs"
PLOT_DIR = os.path.join(OUT_DIR, "plots")
os.makedirs(PLOT_DIR, exist_ok=True)

# ----------------------------
# Training (multi-class) and gradient extraction for one run
# ----------------------------
def train_kfold_repeats(X: np.ndarray, Y: np.ndarray, seed_base: int):
    kf = KFold(n_splits=K_SPLITS, shuffle=True, random_state=42)
    all_models = []
    num_classes = int(np.max(Y)) + 1
    for fold, (tr, va) in enumerate(kf.split(X, Y), 1):
        X_tr, y_tr = X[tr], Y[tr]
        X_va, y_va = X[va], Y[va]
        for r in range(N_REPEATS):
            seed = seed_base * 1000 + fold * 100 + r
            tf.keras.utils.set_random_seed(seed)
            np.random.seed(seed)
            m = build_model(X.shape[1], num_classes)
            m.fit(X_tr, y_tr,
                  epochs=EPOCHS,
                  batch_size=BATCH_SIZE,
                  validation_data=(X_va, y_va),
                  verbose=0)
            all_models.append(m)
        print(f"[Seed base {seed_base}] Fold {fold}/{K_SPLITS} trained {N_REPEATS} models (total: {len(all_models)})")
    return all_models

def run_once_get_gradient(X: np.ndarray, Y: np.ndarray, seed_base: int) -> np.ndarray:
    models = train_kfold_repeats(X, Y, seed_base)
    avg_grad = compute_avg_group_logodds_gradient(X, models, pos_ids=(2,3), neg_ids=(0,1), eps=1e-8)
    # cleanup
    try:
        for m in models: del m
    except Exception:
        pass
    tf.keras.backend.clear_session(); gc.collect()
    return avg_grad  # (D,)

# ----------------------------
# Main: filter bin=45, train twice, plot mirrors
# ----------------------------
def main():
    # Load and filter
    df = pd.read_csv(CSV_PATH)
    fdf = df[df["bin"] == BIN_VALUE].copy()
    if fdf.empty:
        raise ValueError(f"No rows found for bin == {BIN_VALUE} in {CSV_PATH}")

    # Normalize all features except ['bin','target'] by (max+1)
    cols_to_norm = fdf.columns.difference(['bin', 'target'])
    fdf[cols_to_norm] = fdf[cols_to_norm].apply(lambda x: x / (x.max() + 1.0))

    # Keep only 0..3
    fdf = fdf[fdf["target"].astype(int).isin([0,1,2,3])].copy()
    Y = fdf["target"].astype(int).to_numpy()
    X = np.nan_to_num(fdf.drop(columns=['bin', 'target']).to_numpy(), copy=False)

    if X.shape[0] < 2 or X.shape[1] < 1:
        raise ValueError(f"Insufficient data (samples={X.shape[0]}, dim={X.shape[1]}).")

    print(f"Bin {BIN_VALUE}: samples={X.shape[0]}, dim={X.shape[1]}  class_counts="
          f"{dict(zip(*np.unique(Y, return_counts=True)))}")

    # Two independent runs
    grad_A = run_once_get_gradient(X, Y, seed_base=SEED_BASES[0])
    grad_B = run_once_get_gradient(X, Y, seed_base=SEED_BASES[1])

    # Save raw gradients
    os.makedirs(OUT_DIR, exist_ok=True)
    np.save(os.path.join(OUT_DIR, f"bin{BIN_VALUE}_grad_runA.npy"), grad_A)
    np.save(os.path.join(OUT_DIR, f"bin{BIN_VALUE}_grad_runB.npy"), grad_B)
    # CSV with truncated grid for convenience
    n_grid = min(10000, min(grad_A.size, grad_B.size))
    x_grid = np.arange(600, 600 + 0.1 * n_grid, 0.1)[:n_grid]
    pd.DataFrame({"m/z": x_grid, "grad_runA": grad_A[:n_grid], "grad_runB": grad_B[:n_grid]}).to_csv(
        os.path.join(OUT_DIR, f"bin{BIN_VALUE}_grads_AB.csv"), index=False
    )

    # Split into pos / neg(abs)
    yA = grad_A[:n_grid]; yB = grad_B[:n_grid]
    yA_pos = np.where(yA > 0, yA, 0.0)
    yB_pos = np.where(yB > 0, yB, 0.0)
    yA_neg = np.where(yA < 0, -yA, 0.0)  # absolute value
    yB_neg = np.where(yB < 0, -yB, 0.0)

    # Cosine similarities (for titles/logs)
    cos_pos = cosine_sim(yA_pos, yB_pos)
    cos_neg = cosine_sim(yA_neg, yB_neg)

    # Mirror plots (positive / negative)
    pos_title = f"Bin {BIN_VALUE} — Mirror Positive Gradients  (cos={cos_pos:.4f})"
    neg_title = f"Bin {BIN_VALUE} — Mirror Negative Gradients |abs|  (cos={cos_neg:.4f})"

    mirror_plot(
        x_grid, yA_pos, yB_pos,
        title=pos_title,
        outfile=os.path.join(PLOT_DIR, f"bin{BIN_VALUE}_mirror_pos.png")
    )
    mirror_plot(
        x_grid, yA_neg, yB_neg,
        title=neg_title,
        outfile=os.path.join(PLOT_DIR, f"bin{BIN_VALUE}_mirror_negabs.png")
    )

    # Save small JSON summary
    summary = {
        "bin": BIN_VALUE,
        "comparison": "log(p2+p3) - log(p0+p1)",
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "k_splits": K_SPLITS,
        "n_repeats": N_REPEATS,
        "seed_bases": SEED_BASES,
        "cosine_pos": cos_pos,
        "cosine_neg_abs": cos_neg,
        "paths": {
            "grads_csv": os.path.join(OUT_DIR, f"bin{BIN_VALUE}_grads_AB.csv"),
            "grad_runA_npy": os.path.join(OUT_DIR, f"bin{BIN_VALUE}_grad_runA.npy"),
            "grad_runB_npy": os.path.join(OUT_DIR, f"bin{BIN_VALUE}_grad_runB.npy"),
            "mirror_pos_png": os.path.join(PLOT_DIR, f"bin{BIN_VALUE}_mirror_pos.png"),
            "mirror_negabs_png": os.path.join(PLOT_DIR, f"bin{BIN_VALUE}_mirror_negabs.png"),
        }
    }
    with open(os.path.join(OUT_DIR, f"bin{BIN_VALUE}_summary.json"), "w") as f:
        json.dump(summary, f, indent=2)

    print("\nSaved:")
    for k, v in summary["paths"].items():
        print(f"  {k}: {v}")
    print(f"\nCosine similarities — positive: {cos_pos:.6f}   negative(|abs|): {cos_neg:.6f}")

    # final cleanup
    tf.keras.backend.clear_session(); gc.collect()

if __name__ == "__main__":
    main()


Bin 45: samples=118, dim=13690  class_counts={0: 33, 1: 30, 2: 26, 3: 29}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[Seed base 111] Fold 1/5 trained 10 models (total: 10)
[Seed base 111] Fold 2/5 trained 10 models (total: 20)
[Seed base 111] Fold 3/5 trained 10 models (total: 30)
[Seed base 111] Fold 4/5 trained 10 models (total: 40)
[Seed base 111] Fold 5/5 trained 10 models (total: 50)
[Seed base 777] Fold 1/5 trained 10 models (total: 10)
[Seed base 777] Fold 2/5 trained 10 models (total: 20)
[Seed base 777] Fold 3/5 trained 10 models (total: 30)
[Seed base 777] Fold 4/5 trained 10 models (total: 40)
[Seed base 777] Fold 5/5 trained 10 models (total: 50)

Saved:
  grads_csv: ./bin45_group_compare_two_runs\bin45_grads_AB.csv
  grad_runA_npy: ./bin45_group_compare_two_runs\bin45_grad_runA.npy
  grad_runB_npy: ./bin45_group_compare_two_runs\bin45_grad_runB.npy
  mirror_pos_png: ./bin45_group_compare_two_runs\plots\bin45_mirror_pos.png
  mirror_negabs_png: ./bin45_group_compare_two_runs\plots\bin45_mirror_negabs.png

Cosine similarities — positive: 0.760823   negative(|abs|): 0.910192
