In [1]:
!python ../src/utils.py
%reload_ext autoreload

# Mass Reconstruction for Each dE/dx Estimator

This notebook reconstructs the mass for each track using different dE/dx estimators (harmonic mean, truncated mean, etc.) from AOD data. The mass is calculated using the formula:

$$ I_h = K \frac{m^2}{p^2} + C $$

where:
- $I_h$ is the estimator mean for each track,
- $p$ is the track momentum (for high-energy tracks, $p \approx p_T$),
- $K = 2.49$ and $C = 3.18$ (from 2018 simulation).

For each estimator, we solve for $m$:

$$ m = p \cdot \sqrt{\frac{I_h - C}{K}} $$

Each cell below reconstructs and plots the mass distribution for a different estimator.


In [2]:
# Import all required libraries
import numpy as np
import matplotlib.pyplot as plt
import ROOT as rt
import pandas as pd
from ROOT import VecOps
import utils

# Enable JSROOT 
%jsroot on

from analysis import df_filtered, HMNCSBR, TRUNCSBR


In [3]:
# Constants from 2018 simulation
K = 2.49
C = 3.18

# Load and prepare data
columns_needed = ['IsoTrack_pt'] + HMNCSBR + TRUNCSBR
df_pd = df_filtered.AsNumpy(columns_needed)

print(f"Constants: K = {K}, C = {C}\n")
print(f"Available estimators:")
print(f"Harmonic means: {HMNCSBR}")
print(f"Truncated means: {TRUNCSBR}")
print(f"Number of tracks: {len(df_pd['IsoTrack_pt'])}")
print(columns_needed)


# Quick peek at the data
print("\nKeys:", list(df_pd.keys()))


Constants: K = 2.49, C = 3.18

Available estimators:
Harmonic means: ['DeDx_IhStrip1', 'DeDx_IhStrip', 'DeDx_IhStrip3', 'DeDx_IhStrip4']
Truncated means: ['DeDx_ItStrip0', 'DeDx_ItStrip5', 'DeDx_ItStrip10', 'DeDx_ItStrip15', 'DeDx_ItStrip20', 'DeDx_ItStrip25', 'DeDx_ItStrip30', 'DeDx_ItStrip35', 'DeDx_ItStrip']
Number of tracks: 911
['IsoTrack_pt', 'DeDx_IhStrip1', 'DeDx_IhStrip', 'DeDx_IhStrip3', 'DeDx_IhStrip4', 'DeDx_ItStrip0', 'DeDx_ItStrip5', 'DeDx_ItStrip10', 'DeDx_ItStrip15', 'DeDx_ItStrip20', 'DeDx_ItStrip25', 'DeDx_ItStrip30', 'DeDx_ItStrip35', 'DeDx_ItStrip']

Keys: ['IsoTrack_pt', 'DeDx_IhStrip1', 'DeDx_IhStrip', 'DeDx_IhStrip3', 'DeDx_IhStrip4', 'DeDx_ItStrip0', 'DeDx_ItStrip5', 'DeDx_ItStrip10', 'DeDx_ItStrip15', 'DeDx_ItStrip20', 'DeDx_ItStrip25', 'DeDx_ItStrip30', 'DeDx_ItStrip35', 'DeDx_ItStrip']


In [4]:
# Inspect df_pd structure and content
print("=== df_pd Structure ===")
print(f"Type: {type(df_pd)}")
print(f"Keys (columns): {list(df_pd.keys())}")

print("\n=== Shape and Types ===")
for key in df_pd.keys():
    data = df_pd[key]
    print(f"{key}: shape={np.array(data).shape}, type={type(data)}")
    
    # Show first few values
    if hasattr(data, '__len__') and len(data) > 0:
        try:
            sample = data[:5] if len(data) > 5 else data
            print(f"  First few values: {sample}")
        except:
            print(f"  Sample value: {data[0] if len(data) > 0 else 'No data'}")
    print()

print("\n=== Alternative: Use pandas for better display ===")


# Convert to pandas DataFrame for better visualization
try:
    # Try to convert to pandas - handle potential nested arrays
    df_display = {}
    for key, values in df_pd.items():
        # Take first 10 entries for display
        if hasattr(values, '__len__') and len(values) > 0:
            df_display[key] = values[:10]
        else:
            df_display[key] = values
    
    pdf = pd.DataFrame(df_display)
    print("First 10 rows as pandas DataFrame:")
    print(pdf)
except Exception as e:
    print(f"Could not convert to pandas DataFrame: {e}")
    
    # Fallback: show raw data structure
    print("\nRaw data preview:")
    for key in list(df_pd.keys())[:3]:  # Show first 3 columns
        print(f"\n{key}:")
        data = df_pd[key]
        if hasattr(data, '__len__') and len(data) > 0:
            print(f"  Length: {len(data)}")
            print(f"  First 5 values: {data[:5]}")
        else:
            print(f"  Value: {data}")

=== df_pd Structure ===
Type: <class 'dict'>
Keys (columns): ['IsoTrack_pt', 'DeDx_IhStrip1', 'DeDx_IhStrip', 'DeDx_IhStrip3', 'DeDx_IhStrip4', 'DeDx_ItStrip0', 'DeDx_ItStrip5', 'DeDx_ItStrip10', 'DeDx_ItStrip15', 'DeDx_ItStrip20', 'DeDx_ItStrip25', 'DeDx_ItStrip30', 'DeDx_ItStrip35', 'DeDx_ItStrip']

=== Shape and Types ===
IsoTrack_pt: shape=(911,), type=<class 'ROOT._pythonization._rdf_utils.ndarray'>
  First few values: [<cppyy.gbl.ROOT.VecOps.RVec<double> object at 0x5e534308a260>
 <cppyy.gbl.ROOT.VecOps.RVec<double> object at 0x5e534308a2b0>
 <cppyy.gbl.ROOT.VecOps.RVec<double> object at 0x5e534308a300>
 <cppyy.gbl.ROOT.VecOps.RVec<double> object at 0x5e534308a350>
 <cppyy.gbl.ROOT.VecOps.RVec<double> object at 0x5e534308a3a0>]

DeDx_IhStrip1: shape=(911,), type=<class 'ROOT._pythonization._rdf_utils.ndarray'>
  First few values: [<cppyy.gbl.ROOT.VecOps.RVec<float> object at 0x5e534309e270>
 <cppyy.gbl.ROOT.VecOps.RVec<float> object at 0x5e534309e2b0>
 <cppyy.gbl.ROOT.VecOps.RVec

In [5]:
#sanity‑check on if the arrays have consistent shapes

keys = [
    "IsoTrack_pt",
    "DeDx_IhStrip1", "DeDx_IhStrip", "DeDx_IhStrip3", "DeDx_IhStrip4",
    "DeDx_ItStrip0", "DeDx_ItStrip5", "DeDx_ItStrip10", "DeDx_ItStrip15",
    "DeDx_ItStrip20", "DeDx_ItStrip25", "DeDx_ItStrip30", "DeDx_ItStrip35",
    "DeDx_ItStrip",
]

ok, bad_rows = utils.check_branch_shapes(df_pd, keys)
if ok:
    print("All arrays have consistent shapes.")
else:
    print(f"Arrays have inconsistent shapes. Bad rows: {bad_rows}")
    print("Check the data for potential issues.")

        
        

All rows consistent? True
All arrays have consistent shapes.


In [4]:
# Extract actual values from ROOT data structures
print("=== Extracting Actual Values ===")

# Method 1: Use VecOps to convert ROOT vectors to Python lists
print("\n--- Using VecOps conversion ---")
for key in list(df_pd.keys())[:3]:  # Show first 3 columns
    data = df_pd[key]
    print(f"\n{key}:")
    print(f"  Raw data type: {type(data)}")
    print(f"  Length: {len(data)}")
    
    # Convert first few entries to see actual values
    actual_values = []
    for i in range(min(5, len(data))):
        try:
            # Try to convert using VecOps
            if hasattr(data[i], '__len__'):  # If it's array-like
                val = list(data[i])  # Convert to Python list
            else:
                val = data[i]  # Already a scalar
            actual_values.append(val)
        except Exception as e:
            actual_values.append(f"Error: {e}")
    
    print(f"  First 5 actual values: {actual_values}")

# Method 2: Direct numpy conversion
print("\n--- Using numpy array conversion ---")
try:
    for key in ['IsoTrack_pt', 'DeDx_IhStrip']:
        if key in df_pd:
            data = df_pd[key]
            print(f"\n{key}:")
            
            # Convert to numpy array and show statistics
            np_array = np.array(data)
            print(f"  Numpy shape: {np_array.shape}")
            print(f"  Numpy dtype: {np_array.dtype}")
            
            # If it's an array of scalars
            if np_array.ndim == 1:
                print(f"  Min: {np_array.min():.3f}")
                print(f"  Max: {np_array.max():.3f}")
                print(f"  Mean: {np_array.mean():.3f}")
                print(f"  First 10 values: {np_array[:10]}")
            else:
                print(f"  Complex array structure - showing first element: {np_array[0]}")
                
except Exception as e:
    print(f"Numpy conversion failed: {e}")

# Method 3: For ROOT RDataFrame, use Display() on original df_filtered
print("\n--- Original RDataFrame Display ---")
try:
    print("First 10 rows from original RDataFrame:")
    df_filtered.Display(10)
except Exception as e:
    print(f"Display failed: {e}")
    
    # Alternative: Show some statistics
    try:
        print("RDataFrame statistics:")
        print(f"Number of entries: {df_filtered.Count().GetValue()}")
        
        # Show a few specific values
        pt_values = df_filtered.Take("IsoTrack_pt", 5)
        dedx_values = df_filtered.Take("DeDx_IhStrip", 5)
        
        print(f"First 5 pt values: {list(pt_values)}")
        print(f"First 5 DeDx_IhStrip values: {list(dedx_values)}")
        
    except Exception as e2:
        print(f"Alternative display also failed: {e2}")

=== Extracting Actual Values ===

--- Using VecOps conversion ---

IsoTrack_pt:
  Raw data type: <class 'ROOT._pythonization._rdf_utils.ndarray'>
  Length: 911
  First 5 actual values: [[1414.0664004311288, 33.59526552626143], [1432.929478340372, 1465.1806541163992], [1633.4846152915236], [2016.5338978383463, 1837.4766899466679], [693.5901758231792]]

DeDx_IhStrip1:
  Raw data type: <class 'ROOT._pythonization._rdf_utils.ndarray'>
  Length: 911
  First 5 actual values: [[6.2467265129089355, 3.4969427585601807], [5.511447906494141, 5.713008403778076], [5.352411270141602], [6.777451992034912, 7.3498711585998535], [4.622747421264648]]

DeDx_IhStrip:
  Raw data type: <class 'ROOT._pythonization._rdf_utils.ndarray'>
  Length: 911
  First 5 actual values: [[6.19785213470459, 3.3484208583831787], [5.4514031410217285, 5.572141170501709], [5.29805850982666], [6.663329124450684, 7.245083332061768], [4.446615219116211]]

--- Using numpy array conversion ---

IsoTrack_pt:
  Numpy shape: (911,)
  N

In [5]:
# Data successfully loaded in previous cell
print("Ready to perform mass reconstruction for all estimators")

Ready to perform mass reconstruction for all estimators


## Harmonic Mean Estimator Mass Reconstruction

In [6]:
# Mass reconstruction for harmonic mean estimator (DeDx_IhStrip)

# Flatten and convert to numpy arrays
pt_data = df_pd['IsoTrack_pt']
ih_data = df_pd['DeDx_IhStrip']

# Convert to numpy and flatten if needed
if hasattr(pt_data, '__len__') and len(pt_data) > 0:
    pt = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data])
    Ih_harmonic = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data])
else:
    pt = np.array(pt_data)
    Ih_harmonic = np.array(ih_data)

print(f"Data shape - pt: {pt.shape}, Ih: {Ih_harmonic.shape}")
print(f"pt range: {np.min(pt):.3f} - {np.max(pt):.3f} GeV")
print(f"Ih range: {np.min(Ih_harmonic):.3f} - {np.max(Ih_harmonic):.3f}")

# Only use tracks where Ih > C to avoid sqrt of negative
mask = Ih_harmonic > C
print(f"Tracks with Ih > C: {np.sum(mask)} / {len(Ih_harmonic)}")

mass_harmonic = np.zeros_like(pt)
mass_harmonic[mask] = pt[mask] * np.sqrt((Ih_harmonic[mask] - C) / K)

print(f"Mass range: {np.min(mass_harmonic[mask]):.3f} - {np.max(mass_harmonic[mask]):.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
h_mass_harmonic = rt.TH1D("h_mass_harmonic", 
                           "Mass Distribution (Harmonic Mean Estimator - DeDx_IhStrip);Reconstructed Mass [GeV];Number of Tracks",
                           100, 0, 5000)

# Fill histogram
for mass in mass_harmonic[mask]:
    h_mass_harmonic.Fill(mass)

# Style the histogram
h_mass_harmonic.SetFillColor(rt.kBlue)
h_mass_harmonic.SetFillStyle(1001)
h_mass_harmonic.SetLineColor(rt.kBlue+2)
h_mass_harmonic.SetLineWidth(2)

# Create canvas and draw histogram
canvas_harmonic = rt.TCanvas("canvas_harmonic", "Harmonic Mean Mass Distribution", 800, 600)
h_mass_harmonic.Draw("HIST")
canvas_harmonic.Draw()

print(f"Harmonic Mean (DeDx_IhStrip): {len(mass_harmonic[mask])} tracks with valid mass reconstruction")
print(f"Mean mass: {np.mean(mass_harmonic[mask]):.1f} GeV")
print(f"Median mass: {np.median(mass_harmonic[mask]):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 21.065
Tracks with Ih > C: 1610 / 1802
Mass range: 0.511 - 375470.253 GeV
Harmonic Mean (DeDx_IhStrip): 1610 tracks with valid mass reconstruction
Mean mass: 1304.1 GeV
Median mass: 829.2 GeV


## Truncated Mean Estimator Mass Reconstruction

In [7]:
# Mass reconstruction for Default Truncated Mean Estimator (DeDx_ItStrip)

# Get the default truncated mean estimator data
pt_data_default = df_pd['IsoTrack_pt']
ih_data_default = df_pd['DeDx_ItStrip']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_default = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_default])
    Ih_default = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_default])
except:
    # Fallback to direct conversion if data is already flat
    pt_default = np.array(pt_data_default)
    Ih_default = np.array(ih_data_default)

print(f"Data shape - pt: {pt_default.shape}, Ih: {Ih_default.shape}")
print(f"pt range: {pt_default.min():.3f} - {pt_default.max():.3f} GeV")
print(f"Ih range: {Ih_default.min():.3f} - {Ih_default.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_default = Ih_default > C
print(f"Tracks with Ih > C: {np.sum(mask_default)} / {len(Ih_default)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_default = pt_default[mask_default] * np.sqrt((Ih_default[mask_default] - C) / K)

print(f"Mass range: {mass_default.min():.3f} - {mass_default.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_default = rt.TH1D("hist_default", "Mass Distribution (Default Truncated Mean Estimator - DeDx_ItStrip);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_default:
    hist_default.Fill(mass)

# Style and draw histogram
hist_default.SetFillColor(rt.kOrange)
hist_default.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_default = rt.TCanvas("canvas_default", "Default Truncated Mean Mass Distribution", 800, 600)
hist_default.Draw("HIST")
canvas_default.Draw()

print(f"Default Truncated Mean (DeDx_ItStrip): {np.sum(mask_default)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_default.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_default):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.111
Tracks with Ih > C: 1440 / 1802
Mass range: 2.720 - 342737.520 GeV
Default Truncated Mean (DeDx_ItStrip): 1440 tracks with valid mass reconstruction
Mean mass: 1327.1 GeV
Median mass: 840.5 GeV


## Other Harmonic Mean Estimators

In [8]:
# Mass reconstruction for Harmonic Mean Estimator (DeDx_IhStrip1)

# Get the harmonic mean estimator data
pt_data_h1 = df_pd['IsoTrack_pt']
ih_data_h1 = df_pd['DeDx_IhStrip1']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_h1 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_h1])
    Ih_h1 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_h1])
except:
    # Fallback to direct conversion if data is already flat
    pt_h1 = np.array(pt_data_h1)
    Ih_h1 = np.array(ih_data_h1)

print(f"Data shape - pt: {pt_h1.shape}, Ih: {Ih_h1.shape}")
print(f"pt range: {pt_h1.min():.3f} - {pt_h1.max():.3f} GeV")
print(f"Ih range: {Ih_h1.min():.3f} - {Ih_h1.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_h1 = Ih_h1 > C
print(f"Tracks with Ih > C: {np.sum(mask_h1)} / {len(Ih_h1)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_h1 = pt_h1[mask_h1] * np.sqrt((Ih_h1[mask_h1] - C) / K)

print(f"Mass range: {mass_h1.min():.3f} - {mass_h1.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_h1 = rt.TH1D("hist_harm1", "Mass Distribution (Harmonic Mean Estimator - DeDx_IhStrip1);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_h1:
    hist_h1.Fill(mass)

# Style and draw histogram
hist_h1.SetFillColor(rt.kBlue)
hist_h1.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_h1 = rt.TCanvas("canvas_h1", "Harmonic Mean 1 Mass Distribution", 800, 600)
hist_h1.Draw("HIST")
canvas_h1.Draw()

print(f"Harmonic Mean (DeDx_IhStrip1): {np.sum(mask_h1)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_h1.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_h1):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 21.198
Tracks with Ih > C: 1641 / 1802
Mass range: 2.269 - 380678.809 GeV
Harmonic Mean (DeDx_IhStrip1): 1641 tracks with valid mass reconstruction
Mean mass: 1312.9 GeV
Median mass: 837.6 GeV


In [9]:
# Mass reconstruction for Harmonic Mean Estimator (DeDx_IhStrip3)

# Get the harmonic mean estimator data
pt_data_h3 = df_pd['IsoTrack_pt']
ih_data_h3 = df_pd['DeDx_IhStrip3']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_h3 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_h3])
    Ih_h3 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_h3])
except:
    # Fallback to direct conversion if data is already flat
    pt_h3 = np.array(pt_data_h3)
    Ih_h3 = np.array(ih_data_h3)

print(f"Data shape - pt: {pt_h3.shape}, Ih: {Ih_h3.shape}")
print(f"pt range: {pt_h3.min():.3f} - {pt_h3.max():.3f} GeV")
print(f"Ih range: {Ih_h3.min():.3f} - {Ih_h3.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_h3 = Ih_h3 > C
print(f"Tracks with Ih > C: {np.sum(mask_h3)} / {len(Ih_h3)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_h3 = pt_h3[mask_h3] * np.sqrt((Ih_h3[mask_h3] - C) / K)

print(f"Mass range: {mass_h3.min():.3f} - {mass_h3.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_h3 = rt.TH1D("hist_harm3", "Mass Distribution (Harmonic Mean Estimator - DeDx_IhStrip3);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_h3:
    hist_h3.Fill(mass)

# Style and draw histogram
hist_h3.SetFillColor(rt.kMagenta)
hist_h3.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_h3 = rt.TCanvas("canvas_h3", "Harmonic Mean 3 Mass Distribution", 800, 600)
hist_h3.Draw("HIST")
canvas_h3.Draw()

print(f"Harmonic Mean (DeDx_IhStrip3): {np.sum(mask_h3)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_h3.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_h3):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.930
Tracks with Ih > C: 1561 / 1802
Mass range: 2.386 - 370864.822 GeV
Harmonic Mean (DeDx_IhStrip3): 1561 tracks with valid mass reconstruction
Mean mass: 1313.4 GeV
Median mass: 844.2 GeV


In [10]:
# Mass reconstruction for Harmonic Mean Estimator (DeDx_IhStrip4)

# Get the harmonic mean estimator data
pt_data_h4 = df_pd['IsoTrack_pt']
ih_data_h4 = df_pd['DeDx_IhStrip4']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_h4 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_h4])
    Ih_h4 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_h4])
except:
    # Fallback to direct conversion if data is already flat
    pt_h4 = np.array(pt_data_h4)
    Ih_h4 = np.array(ih_data_h4)

print(f"Data shape - pt: {pt_h4.shape}, Ih: {Ih_h4.shape}")
print(f"pt range: {pt_h4.min():.3f} - {pt_h4.max():.3f} GeV")
print(f"Ih range: {Ih_h4.min():.3f} - {Ih_h4.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_h4 = Ih_h4 > C
print(f"Tracks with Ih > C: {np.sum(mask_h4)} / {len(Ih_h4)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_h4 = pt_h4[mask_h4] * np.sqrt((Ih_h4[mask_h4] - C) / K)

print(f"Mass range: {mass_h4.min():.3f} - {mass_h4.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_h4 = rt.TH1D("hist_harm4", "Mass Distribution (Harmonic Mean Estimator - DeDx_IhStrip4);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_h4:
    hist_h4.Fill(mass)

# Style and draw histogram
hist_h4.SetFillColor(rt.kCyan)
hist_h4.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_h4 = rt.TCanvas("canvas_h4", "Harmonic Mean 4 Mass Distribution", 800, 600)
hist_h4.Draw("HIST")
canvas_h4.Draw()

print(f"Harmonic Mean (DeDx_IhStrip4): {np.sum(mask_h4)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_h4.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_h4):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.795
Tracks with Ih > C: 1502 / 1802
Mass range: 0.333 - 366819.792 GeV
Harmonic Mean (DeDx_IhStrip4): 1502 tracks with valid mass reconstruction
Mean mass: 1335.5 GeV
Median mass: 859.9 GeV


## Other Truncated Mean Estimators

In [11]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip0)

# Get the truncated mean estimator data
pt_data_t0 = df_pd['IsoTrack_pt']
ih_data_t0 = df_pd['DeDx_ItStrip0']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t0 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t0])
    Ih_t0 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t0])
except:
    # Fallback to direct conversion if data is already flat
    pt_t0 = np.array(pt_data_t0)
    Ih_t0 = np.array(ih_data_t0)

print(f"Data shape - pt: {pt_t0.shape}, Ih: {Ih_t0.shape}")
print(f"pt range: {pt_t0.min():.3f} - {pt_t0.max():.3f} GeV")
print(f"Ih range: {Ih_t0.min():.3f} - {Ih_t0.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t0 = Ih_t0 > C
print(f"Tracks with Ih > C: {np.sum(mask_t0)} / {len(Ih_t0)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t0 = pt_t0[mask_t0] * np.sqrt((Ih_t0[mask_t0] - C) / K)

print(f"Mass range: {mass_t0.min():.3f} - {mass_t0.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t0 = rt.TH1D("hist_trunc0", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip0);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t0:
    hist_t0.Fill(mass)

# Style and draw histogram
hist_t0.SetFillColor(rt.kRed)
hist_t0.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t0 = rt.TCanvas("canvas_t0", "Truncated Mean 0 Mass Distribution", 800, 600)
hist_t0.Draw("HIST")
canvas_t0.Draw()

print(f"Truncated Mean (DeDx_ItStrip0): {np.sum(mask_t0)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t0.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t0):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 21.455
Tracks with Ih > C: 1668 / 1802
Mass range: 2.276 - 392887.742 GeV
Truncated Mean (DeDx_ItStrip0): 1668 tracks with valid mass reconstruction
Mean mass: 1370.1 GeV
Median mass: 866.0 GeV


In [12]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip5)

# Get the truncated mean estimator data
pt_data_t5 = df_pd['IsoTrack_pt']
ih_data_t5 = df_pd['DeDx_ItStrip5']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t5 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t5])
    Ih_t5 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t5])
except:
    # Fallback to direct conversion if data is already flat
    pt_t5 = np.array(pt_data_t5)
    Ih_t5 = np.array(ih_data_t5)

print(f"Data shape - pt: {pt_t5.shape}, Ih: {Ih_t5.shape}")
print(f"pt range: {pt_t5.min():.3f} - {pt_t5.max():.3f} GeV")
print(f"Ih range: {Ih_t5.min():.3f} - {Ih_t5.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t5 = Ih_t5 > C
print(f"Tracks with Ih > C: {np.sum(mask_t5)} / {len(Ih_t5)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t5 = pt_t5[mask_t5] * np.sqrt((Ih_t5[mask_t5] - C) / K)

print(f"Mass range: {mass_t5.min():.3f} - {mass_t5.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t5 = rt.TH1D("hist_trunc5", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip5);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t5:
    hist_t5.Fill(mass)

# Style and draw histogram
hist_t5.SetFillColor(rt.kGreen)
hist_t5.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t5 = rt.TCanvas("canvas_t5", "Truncated Mean 5 Mass Distribution", 800, 600)
hist_t5.Draw("HIST")
canvas_t5.Draw()

print(f"Truncated Mean (DeDx_ItStrip5): {np.sum(mask_t5)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t5.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t5):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 21.455
Tracks with Ih > C: 1668 / 1802
Mass range: 2.276 - 392887.742 GeV
Truncated Mean (DeDx_ItStrip5): 1668 tracks with valid mass reconstruction
Mean mass: 1369.4 GeV
Median mass: 866.0 GeV


In [13]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip10)

# Get the truncated mean estimator data
pt_data_t10 = df_pd['IsoTrack_pt']
ih_data_t10 = df_pd['DeDx_ItStrip10']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t10 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t10])
    Ih_t10 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t10])
except:
    # Fallback to direct conversion if data is already flat
    pt_t10 = np.array(pt_data_t10)
    Ih_t10 = np.array(ih_data_t10)

print(f"Data shape - pt: {pt_t10.shape}, Ih: {Ih_t10.shape}")
print(f"pt range: {pt_t10.min():.3f} - {pt_t10.max():.3f} GeV")
print(f"Ih range: {Ih_t10.min():.3f} - {Ih_t10.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t10 = Ih_t10 > C
print(f"Tracks with Ih > C: {np.sum(mask_t10)} / {len(Ih_t10)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t10 = pt_t10[mask_t10] * np.sqrt((Ih_t10[mask_t10] - C) / K)

print(f"Mass range: {mass_t10.min():.3f} - {mass_t10.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t10 = rt.TH1D("hist_trunc10", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip10);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t10:
    hist_t10.Fill(mass)

# Style and draw histogram
hist_t10.SetFillColor(rt.kViolet)
hist_t10.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t10 = rt.TCanvas("canvas_t10", "Truncated Mean 10 Mass Distribution", 800, 600)
hist_t10.Draw("HIST")
canvas_t10.Draw()

print(f"Truncated Mean (DeDx_ItStrip10): {np.sum(mask_t10)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t10.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t10):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 21.455
Tracks with Ih > C: 1652 / 1802
Mass range: 1.788 - 392887.742 GeV
Truncated Mean (DeDx_ItStrip10): 1652 tracks with valid mass reconstruction
Mean mass: 1330.3 GeV
Median mass: 848.1 GeV


In [14]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip15)

# Get the truncated mean estimator data
pt_data_t15 = df_pd['IsoTrack_pt']
ih_data_t15 = df_pd['DeDx_ItStrip15']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t15 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t15])
    Ih_t15 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t15])
except:
    # Fallback to direct conversion if data is already flat
    pt_t15 = np.array(pt_data_t15)
    Ih_t15 = np.array(ih_data_t15)

print(f"Data shape - pt: {pt_t15.shape}, Ih: {Ih_t15.shape}")
print(f"pt range: {pt_t15.min():.3f} - {pt_t15.max():.3f} GeV")
print(f"Ih range: {Ih_t15.min():.3f} - {Ih_t15.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t15 = Ih_t15 > C
print(f"Tracks with Ih > C: {np.sum(mask_t15)} / {len(Ih_t15)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t15 = pt_t15[mask_t15] * np.sqrt((Ih_t15[mask_t15] - C) / K)

print(f"Mass range: {mass_t15.min():.3f} - {mass_t15.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t15 = rt.TH1D("hist_trunc15", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip15);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t15:
    hist_t15.Fill(mass)

# Style and draw histogram
hist_t15.SetFillColor(rt.kGreen)
hist_t15.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t15 = rt.TCanvas("canvas_t15", "Truncated Mean 15 Mass Distribution", 800, 600)
hist_t15.Draw("HIST")
canvas_t15.Draw()

print(f"Truncated Mean (DeDx_ItStrip15): {np.sum(mask_t15)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t15.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t15):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 21.455
Tracks with Ih > C: 1637 / 1802
Mass range: 1.788 - 375528.558 GeV
Truncated Mean (DeDx_ItStrip15): 1637 tracks with valid mass reconstruction
Mean mass: 1310.1 GeV
Median mass: 831.8 GeV


In [15]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip20)

# Get the truncated mean estimator data
pt_data_t20 = df_pd['IsoTrack_pt']
ih_data_t20 = df_pd['DeDx_ItStrip20']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t20 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t20])
    Ih_t20 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t20])
except:
    # Fallback to direct conversion if data is already flat
    pt_t20 = np.array(pt_data_t20)
    Ih_t20 = np.array(ih_data_t20)

print(f"Data shape - pt: {pt_t20.shape}, Ih: {Ih_t20.shape}")
print(f"pt range: {pt_t20.min():.3f} - {pt_t20.max():.3f} GeV")
print(f"Ih range: {Ih_t20.min():.3f} - {Ih_t20.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t20 = Ih_t20 > C
print(f"Tracks with Ih > C: {np.sum(mask_t20)} / {len(Ih_t20)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t20 = pt_t20[mask_t20] * np.sqrt((Ih_t20[mask_t20] - C) / K)

print(f"Mass range: {mass_t20.min():.3f} - {mass_t20.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t20 = rt.TH1D("hist_trunc20", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip20);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t20:
    hist_t20.Fill(mass)

# Style and draw histogram
hist_t20.SetFillColor(rt.kYellow)
hist_t20.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t20 = rt.TCanvas("canvas_t20", "Truncated Mean 20 Mass Distribution", 800, 600)
hist_t20.Draw("HIST")
canvas_t20.Draw()

print(f"Truncated Mean (DeDx_ItStrip20): {np.sum(mask_t20)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t20.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t20):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.879
Tracks with Ih > C: 1608 / 1802
Mass range: 1.872 - 375528.558 GeV
Truncated Mean (DeDx_ItStrip20): 1608 tracks with valid mass reconstruction
Mean mass: 1296.9 GeV
Median mass: 820.8 GeV


In [16]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip25)

# Get the truncated mean estimator data
pt_data_t25 = df_pd['IsoTrack_pt']
ih_data_t25 = df_pd['DeDx_ItStrip25']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t25 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t25])
    Ih_t25 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t25])
except:
    # Fallback to direct conversion if data is already flat
    pt_t25 = np.array(pt_data_t25)
    Ih_t25 = np.array(ih_data_t25)

print(f"Data shape - pt: {pt_t25.shape}, Ih: {Ih_t25.shape}")
print(f"pt range: {pt_t25.min():.3f} - {pt_t25.max():.3f} GeV")
print(f"Ih range: {Ih_t25.min():.3f} - {Ih_t25.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t25 = Ih_t25 > C
print(f"Tracks with Ih > C: {np.sum(mask_t25)} / {len(Ih_t25)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t25 = pt_t25[mask_t25] * np.sqrt((Ih_t25[mask_t25] - C) / K)

print(f"Mass range: {mass_t25.min():.3f} - {mass_t25.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t25 = rt.TH1D("hist_trunc25", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip25);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t25:
    hist_t25.Fill(mass)

# Style and draw histogram
hist_t25.SetFillColor(rt.kOrange+2)
hist_t25.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t25 = rt.TCanvas("canvas_t25", "Truncated Mean 25 Mass Distribution", 800, 600)
hist_t25.Draw("HIST")
canvas_t25.Draw()

print(f"Truncated Mean (DeDx_ItStrip25): {np.sum(mask_t25)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t25.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t25):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.879
Tracks with Ih > C: 1557 / 1802
Mass range: 0.644 - 353210.518 GeV
Truncated Mean (DeDx_ItStrip25): 1557 tracks with valid mass reconstruction
Mean mass: 1300.5 GeV
Median mass: 831.1 GeV
Median mass: 831.1 GeV


In [17]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip30)

# Get the truncated mean estimator data
pt_data_t30 = df_pd['IsoTrack_pt']
ih_data_t30 = df_pd['DeDx_ItStrip30']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t30 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t30])
    Ih_t30 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t30])
except:
    # Fallback to direct conversion if data is already flat
    pt_t30 = np.array(pt_data_t30)
    Ih_t30 = np.array(ih_data_t30)

print(f"Data shape - pt: {pt_t30.shape}, Ih: {Ih_t30.shape}")
print(f"pt range: {pt_t30.min():.3f} - {pt_t30.max():.3f} GeV")
print(f"Ih range: {Ih_t30.min():.3f} - {Ih_t30.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t30 = Ih_t30 > C
print(f"Tracks with Ih > C: {np.sum(mask_t30)} / {len(Ih_t30)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t30 = pt_t30[mask_t30] * np.sqrt((Ih_t30[mask_t30] - C) / K)

print(f"Mass range: {mass_t30.min():.3f} - {mass_t30.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t30 = rt.TH1D("hist_trunc30", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip30);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t30:
    hist_t30.Fill(mass)

# Style and draw histogram
hist_t30.SetFillColor(rt.kRed+2)
hist_t30.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t30 = rt.TCanvas("canvas_t30", "Truncated Mean 30 Mass Distribution", 800, 600)
hist_t30.Draw("HIST")
canvas_t30.Draw()

print(f"Truncated Mean (DeDx_ItStrip30): {np.sum(mask_t30)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t30.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t30):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.879
Tracks with Ih > C: 1519 / 1802
Mass range: 0.644 - 353210.518 GeV
Truncated Mean (DeDx_ItStrip30): 1519 tracks with valid mass reconstruction
Mean mass: 1311.0 GeV
Median mass: 827.3 GeV
Truncated Mean (DeDx_ItStrip30): 1519 tracks with valid mass reconstruction
Mean mass: 1311.0 GeV
Median mass: 827.3 GeV


In [18]:
# Mass reconstruction for Truncated Mean Estimator (DeDx_ItStrip35)

# Get the truncated mean estimator data
pt_data_t35 = df_pd['IsoTrack_pt']
ih_data_t35 = df_pd['DeDx_ItStrip35']

# Convert and flatten the data properly (handle nested arrays)
try:
    pt_t35 = np.concatenate([np.array(track_pt) if hasattr(track_pt, '__iter__') else [track_pt] for track_pt in pt_data_t35])
    Ih_t35 = np.concatenate([np.array(track_ih) if hasattr(track_ih, '__iter__') else [track_ih] for track_ih in ih_data_t35])
except:
    # Fallback to direct conversion if data is already flat
    pt_t35 = np.array(pt_data_t35)
    Ih_t35 = np.array(ih_data_t35)

print(f"Data shape - pt: {pt_t35.shape}, Ih: {Ih_t35.shape}")
print(f"pt range: {pt_t35.min():.3f} - {pt_t35.max():.3f} GeV")
print(f"Ih range: {Ih_t35.min():.3f} - {Ih_t35.max():.3f}")

# Apply mask for valid Ih values (Ih > C)
mask_t35 = Ih_t35 > C
print(f"Tracks with Ih > C: {np.sum(mask_t35)} / {len(Ih_t35)}")

# Mass reconstruction using the formula: m = p * sqrt((Ih - C) / K)
mass_t35 = pt_t35[mask_t35] * np.sqrt((Ih_t35[mask_t35] - C) / K)

print(f"Mass range: {mass_t35.min():.3f} - {mass_t35.max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_t35 = rt.TH1D("hist_trunc35", "Mass Distribution (Truncated Mean Estimator - DeDx_ItStrip35);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_t35:
    hist_t35.Fill(mass)

# Style and draw histogram
hist_t35.SetFillColor(rt.kPink)
hist_t35.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_t35 = rt.TCanvas("canvas_t35", "Truncated Mean 35 Mass Distribution", 800, 600)
hist_t35.Draw("HIST")
canvas_t35.Draw()

print(f"Truncated Mean (DeDx_ItStrip35): {np.sum(mask_t35)} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_t35.mean():.1f} GeV")
print(f"Median mass: {np.median(mass_t35):.1f} GeV")

Data shape - pt: (1802,), Ih: (1802,)
pt range: 9.640 - 359100.965 GeV
Ih range: -1.000 - 20.879
Tracks with Ih > C: 1482 / 1802
Mass range: 0.644 - 342737.520 GeV
Truncated Mean (DeDx_ItStrip35): 1482 tracks with valid mass reconstruction
Mean mass: 1317.3 GeV
Median mass: 831.5 GeV


## Fit Estimator (Landau MPV)

In [24]:
# Debug: Check what functions are available in utils module
print("=== Checking utils module ===")
print("Available functions in utils:")
print([attr for attr in dir(utils) if not attr.startswith('_')])

# Check if 'fit' function exists
if hasattr(utils, 'fit'):
    print("✓ utils.fit function exists")
else:
    print("✗ utils.fit function NOT found")
    print("Available fit-related functions:")
    fit_funcs = [attr for attr in dir(utils) if 'fit' in attr.lower()]
    print(fit_funcs)

# Reload utils module to get latest changes
import importlib
importlib.reload(utils)
print("\n=== After reloading utils ===")
print("Available functions in utils:")
print([attr for attr in dir(utils) if not attr.startswith('_')])

# Now check again for fit function
if hasattr(utils, 'fit'):
    print("✓ utils.fit function exists after reload")
    
    # Get cluster data for fitting
    cluster = df_filtered.AsNumpy(["cluster_DeDxStrip"])["cluster_DeDxStrip"]
    
    # Use the new fit function that directly returns MPV-pt pairs
    print("\nPerforming Landau fits... this may take a moment")
    fit_results = utils.fit(cluster, threshold=2, max_hists=10000)
    
    # Extract MPV-pt pairs - handle inhomogeneous pt structure
    mpv_pt_pairs = fit_results["mpv_pt_pairs"]  # List of (mpv, pt) tuples
    
    print(f"Got {len(mpv_pt_pairs)} MPV-pt pairs")
    
    # Debug: Check structure of first few pairs
    print("\nDebugging pair structure:")
    for i in range(min(3, len(mpv_pt_pairs))):
        mpv, pt = mpv_pt_pairs[i]
        print(f"  Pair {i}: MPV type={type(mpv)}, MPV={mpv}, pt type={type(pt)}, pt shape={np.array(pt).shape if hasattr(pt, '__len__') else 'scalar'}")
    
    # Extract MPV values (should be straightforward)
    fit_mpvs = np.array([pair[0] for pair in mpv_pt_pairs])
    
    # Extract pt values - handle the inhomogeneous structure
    fit_pt_values = []
    for pair in mpv_pt_pairs:
        pt_val = pair[1]
        # If pt_val is array-like, take the first element; if scalar, use as-is
        if hasattr(pt_val, '__len__') and not isinstance(pt_val, str):
            fit_pt_values.append(pt_val[0] if len(pt_val) > 0 else 0.0)
        else:
            fit_pt_values.append(float(pt_val))
    
    fit_pt_values = np.array(fit_pt_values)
    
    print(f"Successfully fitted {len(fit_mpvs)} tracks")
    print(f"MPV array shape: {fit_mpvs.shape}")
    print(f"pt array shape: {fit_pt_values.shape}")
    print(f"Perfect pairing: {len(fit_mpvs) == len(fit_pt_values)}")
    
    # Show some example pairs to verify
    print(f"\nFirst 5 MPV-pt pairs:")
    for i in range(min(5, len(mpv_pt_pairs))):
        mpv = fit_mpvs[i]
        pt = fit_pt_values[i]
        print(f"  Track {i}: MPV = {mpv:.3f}, pt = {pt:.3f} GeV")
        
else:
    print("✗ utils.fit function still NOT found after reload")
    print("Using utils.fit_mpv instead as fallback...")
    
    # Fallback to original approach
    cluster = df_filtered.AsNumpy(["cluster_DeDxStrip"])["cluster_DeDxStrip"]
    print("Performing Landau fits with fit_mpv... this may take a moment")
    fit_results = utils.fit_mpv(cluster, threshold=2, max_hists=10000)
    fit_mpvs = [result[0] for result in fit_results["corel"]]
    print(f"Successfully fitted {len(fit_mpvs)} tracks using fit_mpv")
    print("Note: You'll need to implement the pairing logic manually")

=== Checking utils module ===
Available functions in utils:
['Any', 'ArAvg_DEDx', 'COLOR_MAP', 'Dict', 'FILTERS', 'GeoAvg_DEDx', 'HistogramDrawer', 'List', 'OUTPUT_ROOT', 'Optional', 'PLOTS_DIR', 'TAG', 'Tuple', 'apply_sequential_filters', 'build_event_index', 'date', 'draw_landau_fits', 'fit', 'fit_mpv', 'fit_range', 'freedman_diaconis_bins', 'get_attrs_for_labels', 'h1Avg_DEDx', 'harmonic2_inloop', 'logging', 'math', 'np', 'records_equal', 'report_integrals', 'rt', 'seeds', 'tree', 'uuid', 'write_stacked_histos', 'write_stacked_histos_ptr']
✓ utils.fit function exists

=== After reloading utils ===
Available functions in utils:
['Any', 'ArAvg_DEDx', 'COLOR_MAP', 'Dict', 'FILTERS', 'GeoAvg_DEDx', 'HistogramDrawer', 'List', 'OUTPUT_ROOT', 'Optional', 'PLOTS_DIR', 'TAG', 'Tuple', 'apply_sequential_filters', 'build_event_index', 'date', 'draw_landau_fits', 'fit', 'fit_mpv', 'fit_range', 'freedman_diaconis_bins', 'get_attrs_for_labels', 'h1Avg_DEDx', 'harmonic2_inloop', 'logging', 'math',

Error in <Minuit2>: VariableMetricBuilder Initial matrix not pos.def.
Error in <Minuit2>: VariableMetricBuilder Initial matrix not pos.def.
Error in <Minuit2>: VariableMetricBuilder Initial matrix not pos.def.
Info in <ROOT::Math::ParameterSettings>: lower/upper bounds outside current parameter value. The value will be set to (low+up)/2 
Info in <ROOT::Math::ParameterSettings>: lower/upper bounds outside current parameter value. The value will be set to (low+up)/2 
Info in <ROOT::Math::ParameterSettings>: lower/upper bounds outside current parameter value. The value will be set to (low+up)/2 
Info in <ROOT::Math::ParameterSettings>: lower/upper bounds outside current parameter value. The value will be set to (low+up)/2 
Info in <ROOT::Math::ParameterSettings>: lower/upper bounds outside current parameter value. The value will be set to (low+up)/2 
Error in <Minuit2>: VariableMetricBuilder Initial matrix not pos.def.
Info in <ROOT::Math::ParameterSettings>: lower/upper bounds outside cu

### Fit Estimator (Landau MPV) - Challenge Solved!

Previously, pairing MPV values with their corresponding pt values was complex because `utils.fit_mpv()` only returned MPV values for tracks that:
1. Have enough hits (threshold=2)
2. Fit successfully (Landau fit converges)
3. Are within the max_hists limit (10000)

**Solution**: We created a new `utils.fit()` function that directly returns (MPV, pt) pairs, eliminating the need for complex reconstruction logic!

In [25]:
# Mass reconstruction using the perfectly paired MPV-pt data

print("=== Mass Reconstruction with Fit Estimator ===")

# The data is already perfectly paired from utils.fit()
print(f"Working with {len(fit_mpvs)} MPV-pt pairs")

# Mass reconstruction using the formula: m = p * sqrt((mpv - C) / K)
mask_fit = fit_mpvs > C
mass_fit = np.zeros_like(fit_pt_values)
mass_fit[mask_fit] = fit_pt_values[mask_fit] * np.sqrt((fit_mpvs[mask_fit] - C) / K)

print(f"\nMass reconstruction results:")
print(f"Tracks with MPV > C: {np.sum(mask_fit)} / {len(fit_mpvs)}")
print(f"Mass range: {mass_fit[mask_fit].min():.3f} - {mass_fit[mask_fit].max():.3f} GeV")

# Create ROOT histogram for inline display with JSROOT
hist_fit = rt.TH1D("hist_fit", "Mass Distribution (Fit Estimator - Landau MPV);Reconstructed Mass [GeV];Number of Tracks", 100, 0, 5000)

# Fill histogram
for mass in mass_fit[mask_fit]:
    hist_fit.Fill(mass)

# Style and draw histogram
hist_fit.SetFillColor(rt.kViolet+2)
hist_fit.SetFillStyle(1001)

# Create canvas and draw histogram
canvas_fit = rt.TCanvas("canvas_fit", "Fit Estimator Mass Distribution", 800, 600)
hist_fit.Draw("HIST")
canvas_fit.Draw()

print(f"\nFit Estimator (Landau MPV): {len(mass_fit[mask_fit])} tracks with valid mass reconstruction")
print(f"Mean mass: {mass_fit[mask_fit].mean():.1f} GeV")
print(f"Median mass: {np.median(mass_fit[mask_fit]):.1f} GeV")
print(f"Average MPV: {np.mean(fit_mpvs):.3f}")
print(f"MPV range: {np.min(fit_mpvs):.3f} - {np.max(fit_mpvs):.3f}")

# Verification: Show perfect correspondence
print(f"\nVerification - First 5 MPV-pt-mass triplets:")
for i in range(min(5, len(fit_mpvs))):
    if mask_fit[i]:  # Only show tracks with valid mass
        print(f"  Track {i}: MPV = {fit_mpvs[i]:.3f}, pt = {fit_pt_values[i]:.3f} GeV, mass = {mass_fit[i]:.1f} GeV")

=== Mass Reconstruction with Fit Estimator ===
Working with 1670 MPV-pt pairs

Mass reconstruction results:
Tracks with MPV > C: 1417 / 1670
Mass range: 4.914 - 8828.215 GeV

Fit Estimator (Landau MPV): 1417 tracks with valid mass reconstruction
Mean mass: 509.5 GeV
Median mass: 358.5 GeV
Average MPV: 5.244
MPV range: -6.872 - 19.657

Verification - First 5 MPV-pt-mass triplets:
  Track 0: MPV = 5.800, pt = 1363.346 GeV, mass = 1398.5 GeV
  Track 2: MPV = 5.443, pt = 1144.458 GeV, mass = 1091.0 GeV
  Track 3: MPV = 5.073, pt = 346.605 GeV, mass = 302.2 GeV
  Track 4: MPV = 6.672, pt = 948.432 GeV, mass = 1123.1 GeV


