# Comparing Particle Combinations from ROOT File

This notebook loads a ROOT file and compares two particle combinations to verify:
1. That the combinations actually use different particles
2. That the calculated invariant masses are different
3. Which specific particles are selected for each combination
4. Why the results might be similar or different

**Example combinations to compare:**
- `IM_2e_1j_2p` (2 electrons, 1 jet, 2 photons)
- `IM_2e_1j` (2 electrons, 1 jet)


In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import awkward as ak
from pathlib import Path

from src.parse_atlas import parser
from src.im_calculator.im_calculator import IMCalculator

%matplotlib inline
plt.style.use('default')


In [None]:
# Configuration
# Example: Compare IM_2e_1j_2p vs IM_2e_1j from the same file
root_files_dir = "/storage/agrp/netalev/data/root_files/"
output_dir = "/storage/agrp/netalev/data/inv_masses/"

# Example file and combinations from terminal
base_filename = "2025e-13tev-beta_mc_21c3213e02bc3f7b"
final_state = "2e_2m_1j_2p"

# Combinations to compare
combination1 = {"Electrons": 2, "Jets": 1, "Photons": 2}  # IM_2e_1j_2p
combination2 = {"Electrons": 2, "Jets": 1}  # IM_2e_1j

root_file_path = os.path.join(root_files_dir, f"{base_filename}.root")

print(f"ROOT file: {root_file_path}")
print(f"File exists: {os.path.exists(root_file_path)}")
print(f"\nFinal state: {final_state}")
print(f"Combination 1: {combination1}")
print(f"Combination 2: {combination2}")


In [None]:
# Load the ROOT file
if os.path.exists(root_file_path):
    print(f"Loading ROOT file: {root_file_path}")
    
    # Extract release year from filename
    release_year = parser.AtlasOpenParser.extract_release_year_from_filename(base_filename)
    print(f"Extracted release year: {release_year}")
    
    # Parse the file
    particle_arrays = parser.AtlasOpenParser.parse_root_file(
        root_file_path, 
        batch_size=None,
        release_year=release_year
    )
    
    if particle_arrays is None or len(particle_arrays) == 0:
        print("ERROR: Could not parse ROOT file or file is empty")
    else:
        print(f"✓ Successfully loaded {len(particle_arrays):,} events")
        print(f"Available particle types: {particle_arrays.fields}")
        
        # Show particle counts
        for particle_type in particle_arrays.fields:
            count = ak.num(particle_arrays[particle_type])
            print(f"  {particle_type}: {ak.sum(count):,} total particles in {len(particle_arrays):,} events")
else:
    print(f"ERROR: ROOT file not found: {root_file_path}")
    particle_arrays = None


In [None]:
# Initialize calculator and filter by final state
if particle_arrays is not None:
    calculator = IMCalculator(particle_arrays)
    
    # Find events matching the final state
    fs_events = None
    for fs, events in calculator.group_by_final_state():
        if fs == final_state:
            fs_events = events
            break
    
    if fs_events is None or len(fs_events) == 0:
        print(f"ERROR: No events found for final state: {final_state}")
        print(f"Available final states:")
        for fs, _ in calculator.group_by_final_state():
            print(f"  - {fs}")
    else:
        print(f"✓ Found {len(fs_events):,} events matching final state: {final_state}")
        
        # Show particle counts in this final state
        print("\nParticle counts in final state events:")
        for particle_type in fs_events.fields:
            count = ak.num(fs_events[particle_type])
            print(f"  {particle_type}: min={ak.min(count)}, max={ak.max(count)}, mean={ak.mean(count):.2f}")
else:
    fs_events = None


In [None]:
# Filter events for each combination
if fs_events is not None:
    print("=== Filtering events for each combination ===\n")
    
    # Combination 1: 2e_1j_2p
    filtered1 = calculator.filter_by_particle_counts(
        events=fs_events,
        particle_counts=combination1,
        is_exact_count=True
    )
    print(f"Combination 1 ({combination1}):")
    print(f"  Events after filtering: {len(filtered1):,}")
    
    if len(filtered1) > 0:
        # Slice by pt (top N particles)
        sliced1 = calculator.slice_by_field(
            events=filtered1,
            particle_counts=combination1,
            field_to_slice_by="pt"
        )
        print(f"  Events after slicing: {len(sliced1):,}")
        
        # Show which particles are actually selected
        print(f"  Selected particles:")
        for particle_type, count in combination1.items():
            if count > 0 and particle_type in sliced1.fields:
                particles = sliced1[particle_type]
                print(f"    {particle_type}: {count} particles per event")
                if len(particles) > 0:
                    print(f"      First event pt values: {ak.to_list(particles[0].pt) if len(particles[0]) > 0 else 'N/A'}")
    else:
        sliced1 = None
        print("  ⚠️  No events match this combination")
    
    print()
    
    # Combination 2: 2e_1j
    filtered2 = calculator.filter_by_particle_counts(
        events=fs_events,
        particle_counts=combination2,
        is_exact_count=True
    )
    print(f"Combination 2 ({combination2}):")
    print(f"  Events after filtering: {len(filtered2):,}")
    
    if len(filtered2) > 0:
        # Slice by pt (top N particles)
        sliced2 = calculator.slice_by_field(
            events=filtered2,
            particle_counts=combination2,
            field_to_slice_by="pt"
        )
        print(f"  Events after slicing: {len(sliced2):,}")
        
        # Show which particles are actually selected
        print(f"  Selected particles:")
        for particle_type, count in combination2.items():
            if count > 0 and particle_type in sliced2.fields:
                particles = sliced2[particle_type]
                print(f"    {particle_type}: {count} particles per event")
                if len(particles) > 0:
                    print(f"      First event pt values: {ak.to_list(particles[0].pt) if len(particles[0]) > 0 else 'N/A'}")
    else:
        sliced2 = None
        print("  ⚠️  No events match this combination")
else:
    sliced1 = None
    sliced2 = None


In [None]:
# Calculate invariant masses for both combinations
if sliced1 is not None and sliced2 is not None:
    print("=== Calculating Invariant Masses ===\n")
    
    im1 = calculator.calculate_invariant_mass(sliced1)
    im2 = calculator.calculate_invariant_mass(sliced2)
    
    print(f"Combination 1 ({combination1}):")
    print(f"  Invariant mass array size: {len(im1):,}")
    print(f"  Min: {ak.min(im1):.6f} GeV")
    print(f"  Max: {ak.max(im1):.6f} GeV")
    print(f"  Mean: {ak.mean(im1):.6f} GeV")
    print(f"  Median: {ak.median(im1):.6f} GeV")
    print(f"  Std: {ak.std(im1):.6f} GeV")
    
    print(f"\nCombination 2 ({combination2}):")
    print(f"  Invariant mass array size: {len(im2):,}")
    print(f"  Min: {ak.min(im2):.6f} GeV")
    print(f"  Max: {ak.max(im2):.6f} GeV")
    print(f"  Mean: {ak.mean(im2):.6f} GeV")
    print(f"  Median: {ak.median(im2):.6f} GeV")
    print(f"  Std: {ak.std(im2):.6f} GeV")
    
    # Compare if same size
    if len(im1) == len(im2):
        print(f"\n=== Comparison (both have {len(im1):,} events) ===")
        im1_np = ak.to_numpy(im1)
        im2_np = ak.to_numpy(im2)
        
        are_identical = np.array_equal(im1_np, im2_np)
        print(f"Arrays are identical: {are_identical}")
        
        if not are_identical:
            diff = im1_np - im2_np
            print(f"Max absolute difference: {np.max(np.abs(diff)):.6f} GeV")
            print(f"Mean absolute difference: {np.mean(np.abs(diff)):.6f} GeV")
            print(f"Std of differences: {np.std(diff):.6f} GeV")
            print(f"Number of different values: {np.sum(im1_np != im2_np):,} / {len(im1_np):,}")
            print(f"Percentage different: {100 * np.sum(im1_np != im2_np) / len(im1_np):.2f}%")
            
            if np.max(np.abs(diff)) < 1e-6:
                print("\n⚠️  WARNING: Arrays are nearly identical (difference < 1e-6 GeV)")
                print("   This suggests the additional particles in combination 1 don't affect the result")
            else:
                print("\n✓ Arrays are different - the combinations produce different invariant masses")
        else:
            print("\n⚠️  WARNING: Arrays are IDENTICAL!")
            print("   This is unexpected - different combinations should produce different results")
    else:
        print(f"\nArrays have different sizes: {len(im1):,} vs {len(im2):,}")
        print("Cannot directly compare - they represent different sets of events")
else:
    im1 = None
    im2 = None
    print("Cannot calculate invariant masses - one or both combinations have no events")


In [None]:
# Visualize the comparison
if im1 is not None and im2 is not None:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    im1_np = ak.to_numpy(im1)
    im2_np = ak.to_numpy(im2)
    
    # Histograms
    axes[0, 0].hist(im1_np, bins=50, alpha=0.7, label=f'{combination1}', color='blue', edgecolor='black')
    axes[0, 0].set_xlabel('Invariant Mass (GeV)')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].set_title(f'Combination 1: {combination1}')
    axes[0, 0].grid(True, alpha=0.3)
    axes[0, 0].legend()
    
    axes[0, 1].hist(im2_np, bins=50, alpha=0.7, label=f'{combination2}', color='red', edgecolor='black')
    axes[0, 1].set_xlabel('Invariant Mass (GeV)')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title(f'Combination 2: {combination2}')
    axes[0, 1].grid(True, alpha=0.3)
    axes[0, 1].legend()
    
    # Overlay
    axes[1, 0].hist(im1_np, bins=50, alpha=0.5, label=f'{combination1}', color='blue', edgecolor='black')
    axes[1, 0].hist(im2_np, bins=50, alpha=0.5, label=f'{combination2}', color='red', edgecolor='black')
    axes[1, 0].set_xlabel('Invariant Mass (GeV)')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('Overlay Comparison')
    axes[1, 0].grid(True, alpha=0.3)
    axes[1, 0].legend()
    
    # Scatter plot if same size
    if len(im1_np) == len(im2_np):
        axes[1, 1].scatter(im1_np, im2_np, alpha=0.3, s=1)
        min_val = min(im1_np.min(), im2_np.min())
        max_val = max(im1_np.max(), im2_np.max())
        axes[1, 1].plot([min_val, max_val], [min_val, max_val], 'r--', label='y=x')
        axes[1, 1].set_xlabel(f'Combination 1: {combination1}')
        axes[1, 1].set_ylabel(f'Combination 2: {combination2}')
        axes[1, 1].set_title('Scatter: Combination 1 vs Combination 2')
        axes[1, 1].grid(True, alpha=0.3)
        axes[1, 1].legend()
    else:
        axes[1, 1].text(0.5, 0.5, 'Arrays have different sizes\nCannot scatter plot', 
                       ha='center', va='center', transform=axes[1, 1].transAxes)
        axes[1, 1].set_title('Size Mismatch')
    
    plt.tight_layout()
    plt.show()


In [None]:
# Detailed particle analysis for a few events
if sliced1 is not None and sliced2 is not None and len(sliced1) > 0 and len(sliced2) > 0:
    print("=== Detailed Particle Analysis (First 5 Events) ===\n")
    
    # Find common events (events that appear in both combinations)
    # This is tricky - we need to match events somehow
    # For now, just show first few events
    
    num_events_to_show = min(5, len(sliced1), len(sliced2))
    
    for event_idx in range(num_events_to_show):
        print(f"\nEvent {event_idx}:")
        
        print(f"  Combination 1 ({combination1}):")
        for particle_type, count in combination1.items():
            if count > 0 and particle_type in sliced1.fields:
                particles = sliced1[particle_type][event_idx]
                if len(particles) > 0:
                    print(f"    {particle_type} ({len(particles)} particles):")
                    for i, p in enumerate(particles):
                        print(f"      [{i}] pt={p.pt:.2f}, eta={p.eta:.2f}, phi={p.phi:.2f}, mass={getattr(p, 'mass', 0):.4f}")
        
        print(f"  Combination 2 ({combination2}):")
        for particle_type, count in combination2.items():
            if count > 0 and particle_type in sliced2.fields:
                particles = sliced2[particle_type][event_idx]
                if len(particles) > 0:
                    print(f"    {particle_type} ({len(particles)} particles):")
                    for i, p in enumerate(particles):
                        print(f"      [{i}] pt={p.pt:.2f}, eta={p.eta:.2f}, phi={p.phi:.2f}, mass={getattr(p, 'mass', 0):.4f}")
        
        # Show invariant masses
        if len(im1) > event_idx and len(im2) > event_idx:
            print(f"  Invariant Mass:")
            print(f"    Combination 1: {ak.to_numpy(im1)[event_idx]:.6f} GeV")
            print(f"    Combination 2: {ak.to_numpy(im2)[event_idx]:.6f} GeV")
            if len(im1) == len(im2):
                diff = ak.to_numpy(im1)[event_idx] - ak.to_numpy(im2)[event_idx]
                print(f"    Difference: {diff:.6f} GeV")


## Conclusions

Based on the analysis above:

1. **Are the combinations using different particles?**
   - Combination 1 includes: 2 Electrons, 1 Jet, 2 Photons
   - Combination 2 includes: 2 Electrons, 1 Jet
   - The difference is: Combination 1 has additional particles (2 Photons)

2. **Are the invariant masses different?**
   - If arrays are identical: ⚠️ Potential bug - additional particles should affect the result
   - If arrays differ: ✓ Expected - different particle combinations produce different invariant masses
   - The magnitude of difference depends on the energy/mass of the additional particles

3. **Why might they be similar?**
   - If photons have very low mass/energy, their contribution to invariant mass is small
   - If the same events are selected for both combinations, the base particles are the same
   - The additional particles might be at low pt, contributing minimally to the total invariant mass

4. **Verification:**
   - Check that the selected particles are actually different between combinations
   - Verify that the invariant mass calculation includes all particles in the combination
   - Ensure that slicing (top N by pt) selects the correct particles for each combination
