In [None]:
import os, sys
sys.path.append(os.path.abspath("../.."))
import json
import yaml
from types import SimpleNamespace
from helpers.utils import clean_and_append



In [None]:
args = SimpleNamespace(
    config_workflow_pretrain = "config-EMA-old/workflow_data.yaml",
    config_workflow_scratch  = "config-EMA-scratch-old/workflow_data.yaml",
    boostrap = 8
)
base_dir_pretrain = os.path.dirname(os.path.abspath(args.config_workflow_pretrain))
base_dir_scratch = os.path.dirname(os.path.abspath(args.config_workflow_scratch))


In [None]:
boostrap_summary = dict()
for iboostrap in range(args.boostrap):
    boostrap_summary[iboostrap] = dict()
    config_workflow_file_pretrain = os.path.join(base_dir_pretrain, f"control-boostrap-{iboostrap}.yaml")
    with open(config_workflow_file_pretrain, 'r') as f_boost:
        control = yaml.safe_load(f_boost)
        storedir = control["output"]["storedir"]
        gendir = clean_and_append(storedir, "_hybrid")
        gendir_nosignal = clean_and_append(gendir, "_no_signal")
        gen_performance_json = os.path.join(gendir, "SR", "gen_performance.json")
        gen_performance_nosignal_json = os.path.join(gendir_nosignal, "SR", "gen_performance.json")
        with open(gen_performance_json, 'r') as f_gen:
            gen_performance = json.load(f_gen)
        with open(gen_performance_nosignal_json, 'r') as f_gen:
            gen_performance_nosignal = json.load(f_gen)

        boostrap_summary[iboostrap] = {
            "pretrain-OS": gen_performance,
            "pretrain-SS": gen_performance_nosignal,
        }

    config_workflow_file_scratch = os.path.join(base_dir_scratch, f"control-boostrap-{iboostrap}.yaml")

    with open(config_workflow_file_scratch, 'r') as f_boost:
        control = yaml.safe_load(f_boost)
        storedir = control["output"]["storedir"]
        gendir = clean_and_append(storedir, "_hybrid")
        gendir_nosignal = clean_and_append(gendir, "_no_signal")
        gen_performance_json = os.path.join(gendir, "SR", "gen_performance.json")
        gen_performance_nosignal_json = os.path.join(gendir_nosignal, "SR", "gen_performance.json")
        with open(gen_performance_json, 'r') as f_gen:
            gen_performance = json.load(f_gen)
        with open(gen_performance_nosignal_json, 'r') as f_gen:
            gen_performance_nosignal = json.load(f_gen)

        boostrap_summary[iboostrap].update({
            "scratch-OS": gen_performance,
            "scratch-SS": gen_performance_nosignal,
        })



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.lines import Line2D
import os

def plot_aftercut_statistics_separate(boostrap_summary, figsize=(6, 6), output_dir="plots"):
    """
    Generate separate boxplots for each 'after cut' metric (cov, mmd, efficiency),
    comparing gen-OS and gen-SS across all bootstrap runs.
    Saves each plot as a separate PDF file.

    Parameters:
    - boostrap_summary (dict): The summary from bootstrap runs.
    - figsize (tuple): Size of each individual figure.
    - output_dir (str): Directory to save output PDFs.

    Returns:
    - summary_stats (dict): Dict with mean and variance for each metric and label.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Prepare data
    records = []
    for ibootstrap, summary in boostrap_summary.items():
        for label in summary:
            if 'after cut' in summary[label]:
                metrics = summary[label]['after cut']
                records.append({
                    'bootstrap': ibootstrap,
                    'label': label,
                    'cov': metrics.get('cov', np.nan),
                    'mmd': metrics.get('mmd', np.nan),
                    'efficiency': metrics.get('efficiency', np.nan),
                })

    df = pd.DataFrame(records)
    df_melted = df.melt(id_vars=['bootstrap', 'label'], value_vars=['cov', 'mmd', 'efficiency'],
                        var_name='metric', value_name='value')

    # Determine all labels and order
    all_labels = sorted(df['label'].unique())
    ordered_labels = sorted([l for l in all_labels if l.endswith('OS')]) + \
                     sorted([l for l in all_labels if l.endswith('SS')])
    group_split_index = sum(1 for l in ordered_labels if l.endswith('OS'))

    # Compute summary stats
    summary_stats = {}
    for metric in ['cov', 'mmd', 'efficiency']:
        summary_stats[metric] = {}
        for label in ordered_labels:
            values = df[(df['label'] == label)][metric].dropna()
            summary_stats[metric][label] = {
                'mean': float(np.mean(values)),
                'variance': float(np.var(values, ddof=1)),
                'std': float(np.std(values, ddof=1)),
            }

    # Set color palette
    method_palette = {'pretrain': '#1f77b4', 'scratch': '#ff7f0e'}

    for metric in ['cov', 'mmd', 'efficiency']:
        fig, ax = plt.subplots(figsize=figsize)

        data = df_melted[df_melted['metric'] == metric]
        data = data[data['label'].isin(ordered_labels)]


        sns.boxplot(data=data, x='label', y='value', palette=palette,
                    width=0.6, fliersize=0, order=ordered_labels, ax=ax)

        handles = []
        for i, label in enumerate(ordered_labels):
            mean = summary_stats[metric][label]['mean']
            std = summary_stats[metric][label]['std']

            # Mean line and errorbar
            ax.hlines(mean, i - 0.3, i + 0.3, colors='black', linestyles='dashed', linewidth=1.2)
            ax.errorbar(i, mean, yerr=std, fmt='o', color='black',
                        capsize=6, elinewidth=2.5, markeredgewidth=2.5, markersize=8)

            handles.append(Line2D([0], [0], marker='o', color='black',
                    label=f"{label} ({mean:.3f} ± {std:.3f})",
                    markersize=7, linestyle='dashed'))

        # Vertical separator between OS and SS
        ax.axvline(x=group_split_index - 0.5, color='gray', linestyle='--', linewidth=1)

        # Non-breaking dash for nicer tick labels
        safe_labels = [lbl.replace("-", "\u2011") for lbl in ordered_labels]
        ax.set_xticks(range(len(ordered_labels)))
        ax.set_xticklabels(safe_labels, rotation=15)

        ax.set_title(f"{metric} (after cut)", fontsize=14, weight='bold')
        ax.set_xlabel("")
        ax.set_ylabel("Value")
        ax.legend(handles=handles, loc='upper right', frameon=True)

        plt.tight_layout()
        output_path = os.path.join(output_dir, f"aftercut_{metric}.pdf")
        plt.savefig(output_path, bbox_inches='tight')
        output_path = os.path.join(output_dir, f"aftercut_{metric}.png")
        plt.savefig(output_path, bbox_inches='tight')
        plt.show()
        plt.close()

    return summary_stats


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.lines import Line2D
import os

def plot_aftercut_statistics_separate(boostrap_summary, figsize=(6, 6), output_dir="plots"):
    """
    Generate separate boxplots for each 'after cut' metric (cov, mmd, efficiency),
    comparing gen-OS and gen-SS across all bootstrap runs.
    Saves each plot as a separate PDF and PNG file.

    Parameters:
    - boostrap_summary (dict): The summary from bootstrap runs.
    - figsize (tuple): Size of each individual figure.
    - output_dir (str): Directory to save output PDFs and PNGs.

    Returns:
    - summary_stats (dict): Dict with mean and variance for each metric and label.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Prepare data with 'method' extracted from label
    records = []
    for ibootstrap, summary in boostrap_summary.items():
        for label in summary:
            if 'after cut' in summary[label]:
                metrics = summary[label]['after cut']
                # Extract method from label, assumes label contains 'pretrain' or 'scratch'
                method = 'pretrain' if 'pretrain' in label else 'scratch'
                records.append({
                    'bootstrap': ibootstrap,
                    'label': label,
                    'method': method,
                    'cov': metrics.get('cov', np.nan),
                    'mmd': metrics.get('mmd', np.nan),
                    'efficiency': metrics.get('efficiency', np.nan),
                })

    df = pd.DataFrame(records)

    # Melt keeping 'method'
    df_melted = df.melt(id_vars=['bootstrap', 'label', 'method'],
                        value_vars=['cov', 'mmd', 'efficiency'],
                        var_name='metric', value_name='value')

    # Determine all labels and order (OS first then SS)
    all_labels = sorted(df['label'].unique())
    ordered_labels = sorted([l for l in all_labels if l.endswith('OS')]) + \
                     sorted([l for l in all_labels if l.endswith('SS')])
    group_split_index = sum(1 for l in ordered_labels if l.endswith('OS'))

    # Compute summary stats per metric, label, and method
    summary_stats = {}
    for metric in ['cov', 'mmd', 'efficiency']:
        summary_stats[metric] = {}
        for label in ordered_labels:
            for method in ['pretrain', 'scratch']:
                values = df[(df['label'] == label) & (df['method'] == method)][metric].dropna()
                summary_stats[metric][(label, method)] = {
                    'mean': float(np.mean(values)) if len(values) > 0 else np.nan,
                    'variance': float(np.var(values, ddof=1)) if len(values) > 1 else np.nan,
                    'std': float(np.std(values, ddof=1)) if len(values) > 1 else np.nan,
                }

    # Define color palette for methods
    method_palette = {'pretrain': '#1f77b4', 'scratch': '#ff7f0e'}

    for metric in ['cov', 'mmd', 'efficiency']:
        fig, ax = plt.subplots(figsize=figsize)

        data = df_melted[df_melted['metric'] == metric]
        data = data[data['label'].isin(ordered_labels)]

        sns.boxplot(data=data, x='label', y='value', hue='method',
                    palette=method_palette, width=0.6, fliersize=0,
                    order=ordered_labels, ax=ax)

        # Add mean and std error bars per label and method
        for i, label in enumerate(ordered_labels):
            for j, method in enumerate(['pretrain', 'scratch']):
                stats = summary_stats[metric].get((label, method), None)
                if stats is None or np.isnan(stats['mean']):
                    continue
                # Adjust x position for method side-by-side
                # default box width ~0.6, hue splits ~0.4 between 2 methods
                offset = -0.2 if method == 'pretrain' else 0.2
                xpos = i + offset
                mean = stats['mean']
                std = stats['std']

                ax.hlines(mean, xpos - 0.1, xpos + 0.1, colors='black', linestyles='dashed', linewidth=1.2)
                ax.errorbar(xpos, mean, yerr=std, fmt='o', color='black',
                            capsize=6, elinewidth=2.5, markeredgewidth=2.5, markersize=8)

        # Vertical separator between OS and SS
        ax.axvline(x=group_split_index - 0.5, color='gray', linestyle='--', linewidth=1)

        # Non-breaking dash for nicer tick labels
        safe_labels = [lbl.replace("-", "\u2011") for lbl in ordered_labels]
        ax.set_xticks(range(len(ordered_labels)))
        ax.set_xticklabels(safe_labels, rotation=15)

        ax.set_title(f"{metric} (after cut)", fontsize=14, weight='bold')
        ax.set_xlabel("")
        ax.set_ylabel("Value")
        ax.legend(title='Method', loc='upper right', frameon=True, fontsize=14, title_fontsize=14)

        plt.tight_layout()

        # Save figures
        output_pdf = os.path.join(output_dir, f"aftercut_{metric}.pdf")
        plt.savefig(output_pdf, bbox_inches='tight')
        output_png = os.path.join(output_dir, f"aftercut_{metric}.png")
        plt.savefig(output_png, bbox_inches='tight')

        plt.show()
        plt.close()

    return summary_stats


In [None]:
summary_stats = plot_aftercut_statistics_separate(boostrap_summary, figsize=(12,12))

In [None]:
boostrap_summary_significance = dict()
for iboostrap in range(args.boostrap):
    boostrap_summary_significance[iboostrap] = dict()
    config_workflow_file_pretrain = os.path.join(base_dir_pretrain, f"control-boostrap-{iboostrap}.yaml")
    config_workflow_file_scratch = os.path.join(base_dir_scratch,  f"control-boostrap-{iboostrap}.yaml")
    with open(config_workflow_file_pretrain, 'r') as f_boost:
        control = yaml.safe_load(f_boost)
        plotdir = control["output"]["plotdir"]

        trainOS_dir = os.path.join(plotdir, "step4_bump_hunting")
        trainSS_dir = os.path.join(clean_and_append(plotdir, "_no_signal"), "step4_bump_hunting")

        summary_trainOS_testOS_json = os.path.join(trainOS_dir, "summary_data.json")
        summary_trainOS_testSS_json = os.path.join(trainOS_dir, "summary_data_no_signal.json")
        summary_trainSS_testSS_json = os.path.join(trainSS_dir, "summary_data.json")
        summary_trainSS_testOS_json = os.path.join(trainSS_dir, "summary_data_no_signal.json")

        with open(summary_trainOS_testOS_json, 'r') as f_boost_test:
            summary_trainOS_testOS = json.load(f_boost_test)
        with open(summary_trainSS_testOS_json, 'r') as f_boost_test:
            summary_trainSS_testOS = json.load(f_boost_test)
        with open(summary_trainSS_testSS_json, 'r') as f_boost_test:
            summary_trainSS_testSS = json.load(f_boost_test)
        with open(summary_trainOS_testSS_json, 'r') as f_boost_test:
            summary_trainOS_testSS = json.load(f_boost_test)

        for nbin, summary in summary_trainOS_testSS.items():
            boostrap_summary_significance[iboostrap][nbin] = dict()
            for degree, metric in summary.items():
                boostrap_summary_significance[iboostrap][nbin][degree] = {'pretrain-trainOS_testOS': summary_trainOS_testOS[nbin][degree]['bonus_significance'],
                    'pretrain-trainOS_testSS': summary_trainOS_testSS[nbin][degree]['bonus_significance'],
                    'pretrain-trainSS_testOS': summary_trainSS_testOS[nbin][degree]['bonus_significance'],
                    'pretrain-trainSS_testSS': summary_trainSS_testSS[nbin][degree]['bonus_significance']
                    }

    with open(config_workflow_file_scratch, 'r') as f_boost:
        control = yaml.safe_load(f_boost)
        plotdir = control["output"]["plotdir"]

        trainOS_dir = os.path.join(plotdir, "step4_bump_hunting")
        trainSS_dir = os.path.join(clean_and_append(plotdir, "_no_signal"), "step4_bump_hunting")

        summary_trainOS_testOS_json = os.path.join(trainOS_dir, "summary_data.json")
        summary_trainOS_testSS_json = os.path.join(trainOS_dir, "summary_data_no_signal.json")
        summary_trainSS_testSS_json = os.path.join(trainSS_dir, "summary_data.json")
        summary_trainSS_testOS_json = os.path.join(trainSS_dir, "summary_data_no_signal.json")

        with open(summary_trainOS_testOS_json, 'r') as f_boost_test:
            summary_trainOS_testOS = json.load(f_boost_test)
        with open(summary_trainSS_testOS_json, 'r') as f_boost_test:
            summary_trainSS_testOS = json.load(f_boost_test)
        with open(summary_trainSS_testSS_json, 'r') as f_boost_test:
            summary_trainSS_testSS = json.load(f_boost_test)
        with open(summary_trainOS_testSS_json, 'r') as f_boost_test:
            summary_trainOS_testSS = json.load(f_boost_test)

        for nbin, summary in summary_trainOS_testSS.items():
            for degree, metric in summary.items():
                boostrap_summary_significance[iboostrap][nbin][degree].update({'scratch-trainOS_testOS': summary_trainOS_testOS[nbin][degree]['bonus_significance'],
                    'scratch-trainOS_testSS': summary_trainOS_testSS[nbin][degree]['bonus_significance'],
                    'scratch-trainSS_testOS': summary_trainSS_testOS[nbin][degree]['bonus_significance'],
                    'scratch-trainSS_testSS': summary_trainSS_testSS[nbin][degree]['bonus_significance']
                    })

In [None]:

def plot_significance_comparison_per_nbin_degree(boostrap_summary_significance, figsize=(8, 6), output_dir="plots_significance_grouped"):
    """
    Generate significance comparison plots for each (nbin, degree),
    grouped by test/train combo (e.g. trainOS_testOS), comparing pretrain vs scratch.
    Shows mean ± std and saves plots to PDF.

    Parameters:
    - boostrap_summary_significance (dict): Dict like:
        boostrap_summary_significance[iboostrap][nbin][degree][label] = float
    - figsize (tuple): Size of each individual plot.
    - output_dir (str): Directory to save output PDFs.

    Returns:
    - summary_stats (dict): Nested mean/std stats per (nbin, degree, test/train group)
    """
    import os
    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    import numpy as np

    os.makedirs(output_dir, exist_ok=True)

    # Flatten into long-form DataFrame
    records = []
    for ibootstrap, per_bootstrap in boostrap_summary_significance.items():
        for nbin, per_nbin in per_bootstrap.items():
            for degree, per_degree in per_nbin.items():
                for label, value in per_degree.items():
                    method, group = label.split("-", 1)
                    records.append({
                        "bootstrap": ibootstrap,
                        "nbin": nbin,
                        "degree": degree,
                        "method": method,
                        "group": group,
                        "significance": value
                    })

    df = pd.DataFrame(records)

    # Sort and group by (nbin, degree)
    summary_stats = {}
    group_order = sorted(df['group'].unique(), key=lambda x: ('OS' in x, x))
    method_palette = {'pretrain': '#1f77b4', 'scratch': '#ff7f0e'}

    for (nbin, degree), group_df in df.groupby(['nbin', 'degree']):
        fig, ax = plt.subplots(figsize=figsize)
        summary_stats[(nbin, degree)] = {}

        sns.boxplot(
            data=group_df,
            x='group', y='significance', hue='method',
            palette=method_palette,
            order=group_order,
            width=0.6, fliersize=0, ax=ax
        )

        for i, group in enumerate(group_order):
            for j, method in enumerate(['pretrain', 'scratch']):
                values = group_df[(group_df['group'] == group) & (group_df['method'] == method)]['significance']
                if values.empty:
                    continue
                mean = values.mean()
                std = values.std(ddof=1)
                x_pos = i + (-0.2 if method == 'pretrain' else 0.2)
                ax.errorbar(
                    x_pos, mean, yerr=std,
                    fmt='o', color='black', capsize=5, elinewidth=2, markersize=6,
                    zorder=5,
                )
                ax.text(
                    x_pos, mean + std + 0.1,  # padding above error bar
                    f"{mean:.2f} ± {std:.2f}",
                    ha='center', va='bottom',
                    fontsize=9,
                    color='black',
                    rotation=0,  # <-- horizontal text
                    zorder=6,
                    bbox=dict(
                        boxstyle='round,pad=0.3',  # rounded box with some padding
                        facecolor='white',         # background color
                        alpha=0.5,                 # transparency (0=fully transparent, 1=opaque)
                        edgecolor='none'           # no border
                    )
                )
                summary_stats[(nbin, degree)][f"{method}-{group}"] = {
                    'mean': float(mean), 'std': float(std)
                }

        # Add horizontal line at y=6.4 with label on the right
        ax.axhline(6.4, color= "#DC143C", linestyle='--', linewidth=1.5, alpha=0.75)

        # Place label on top of the line, near right side inside the plot area
        ax.text(
            len(group_order) - 1.4, 6.2, r"6.4$\sigma$ (2502.14036)",
            color="#DC143C", fontsize=10,
            va='center', ha='left',  # vertically centered on line
            bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=0.5)
        )

        ax.set_title(f"Likelihood Reweighted Significance", fontsize=14)
        ax.set_xlabel("")
        ax.set_ylabel("Significance")
        ax.set_xticklabels([g.replace("_", "\n") for g in group_order], fontsize=10)
        ax.axhline(0, linestyle='--', color='gray', linewidth=1)
        ax.legend(title='Method', loc='upper right', fontsize=10, title_fontsize=10)
        plt.tight_layout()
        pdf_path = os.path.join(output_dir, f"significance_bin{nbin}_degree{degree}.pdf")
        plt.savefig(pdf_path)
        pdf_path = os.path.join(output_dir, f"significance_bin{nbin}_degree{degree}.png")
        plt.savefig(pdf_path)
        plt.show()
        plt.close()

    return summary_stats



In [None]:
plot_significance_comparison_per_nbin_degree(boostrap_summary_significance)