In [11]:
import uproot
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import pandas as pd

In [61]:
f = uproot.open("/data/jlai/iris-hep/_mapped.root")
tree = f['material-tracks']
tree.keys()

['event_id',
 'v_x',
 'v_y',
 'v_z',
 'v_px',
 'v_py',
 'v_pz',
 'v_phi',
 'v_eta',
 't_X0',
 't_L0',
 'mat_x',
 'mat_y',
 'mat_z',
 'mat_r',
 'mat_dx',
 'mat_dy',
 'mat_dz',
 'mat_step_length',
 'mat_X0',
 'mat_L0',
 'mat_A',
 'mat_Z',
 'mat_rho',
 'sur_id',
 'sur_type',
 'sur_x',
 'sur_y',
 'sur_z',
 'sur_r',
 'sur_distance',
 'sur_pathCorrection',
 'sur_range_min',
 'sur_range_max',
 'vol_id']

In [65]:
cols = [
    "v_eta", "mat_step_length", "mat_X0", "sur_range_min", "sur_range_max", "t_X0", "mat_X0", 
    "sur_id", "vol_id", "sur_type", "sur_r", "sur_pathCorrection", "mat_r", "mat_dx", "mat_dz",
    "sur_distance", "sur_type", "sur_range_min", "sur_range_max"
]
arr = tree.arrays(cols, library="ak")

# arr['mat_step_length']
arr['sur_range_min']

In [31]:
arr['t_X0']

In [58]:

# arr is your awkward record array from uproot
vals = ak.to_numpy(ak.flatten(arr["mat_r"]))
vals = vals[np.isfinite(vals)]  # drop NaN/Inf just in case

uniq, counts = np.unique(vals, return_counts=True)
counts_exact = (
    pd.DataFrame({"mat_X0": uniq, "count": counts})
    .sort_values("count", ascending=False)
    .reset_index(drop=True)
)
counts_exact

Unnamed: 0,mat_X0,count
0,23.600000,8264
1,37.000000,8248
2,202.000000,7318
3,75.000000,6412
4,228.000000,5302
...,...,...
3036020,692.749268,1
3036021,692.750671,1
3036022,692.751953,1
3036023,692.752319,1


In [40]:
import awkward as ak
import numpy as np
import pandas as pd

ROUND = 1  # or None for exact values


vals_ak = arr["mat_X0"]                 # jagged: n_lists × var_len
pos_ak  = ak.local_index(vals_ak)       # inner position per element (0-based)

# Build a flat row index aligned with the flattened data
counts_np = ak.to_numpy(ak.num(vals_ak))                     # elements per list
row_flat  = np.repeat(np.arange(len(counts_np)), counts_np)  # length == sum(counts_np)

# Flatten vals and positions
vals = ak.to_numpy(ak.flatten(vals_ak))
pos  = ak.to_numpy(ak.flatten(pos_ak))
row  = row_flat

# Sanity check: all equal
print(len(vals), len(pos), len(row_flat))

# Filter finite values
mask = np.isfinite(vals)
vals, pos, row = vals[mask], pos[mask], row[mask]

# Optional rounding to merge float jitter (e.g., 93.699996 vs 93.7)
key = np.round(vals, ROUND) if ROUND is not None else vals

df = pd.DataFrame({"val": vals, "key": key, "pos": pos, "row": row})

# Counts + mean position across ALL occurrences
agg_all = (
    df.groupby("key", as_index=False)
      .agg(count_all=("pos", "size"),
           mean_pos_all=("pos", "mean"),
           lists_seen=("row", "nunique"))
)

# Mean FIRST position per list (if duplicates within a list)
first_per_list = df.groupby(["key","row"], as_index=False).agg(first_pos=("pos","min"))
agg_first = first_per_list.groupby("key", as_index=False).agg(
    mean_first_pos_per_list=("first_pos","mean"),
    lists_seen_first=("row","nunique")
)

out = (agg_all.merge(agg_first, on="key", how="left")
             .sort_values(["mean_pos_all"], ascending=[True])
             .reset_index(drop=True)
             .rename(columns={"key":"mat_X0_value"}))

print(out.head(30))
# out.to_csv("matX0_counts_and_mean_positions.csv", index=False)


3517519 3517519 3517519
    mat_X0_value  count_all  mean_pos_all  lists_seen  \
0     352.799988      10000      0.000000       10000   
1    1718.900024      48777     18.803329        8502   
2      35.599998      22985     22.957451        6704   
3     280.700012      99993     23.575670       10000   
4     283.700012     117916     25.522321       10000   
5      14.400000       6643     25.905464        3686   
6      89.000000      24063     31.348793        8120   
7      93.699997     482987    151.742749       10000   
8       3.900000     364379    190.243258        7714   
9      27.299999     363955    192.288126        7710   
10    162.000000     727522    193.255735        7712   
11     28.200001     658397    274.097031        8565   
12     17.700001     296902    372.085900        8564   
13    425.399994     293000    373.636669        8535   

    mean_first_pos_per_list  lists_seen_first  
0                  0.000000             10000  
1                  3.521

In [18]:
result['sur_r_mm'] * 0.001

0     0.024000
1     0.039361
2     0.074806
3     0.120597
4     0.176494
5     0.203000
6     0.247409
7     0.347103
8     0.486881
9     0.646743
10    0.800599
11    1.000305
12    1.180000
Name: sur_r_mm, dtype: float32

In [19]:
import uproot, numpy as np, pandas as pd


f = '/data/jlai/iris-hep/OutputPT/output_pt_10/measurements.root'
file = uproot.open(f)
t = file["measurements"]

cols = ["volume_id","layer_id","true_x","true_y","true_z",
        "var_loc0","var_loc1","residual_loc0","residual_loc1","pull_loc0","pull_loc1"]
a = t.arrays(cols, library="pd")

a["R"] = np.sqrt(a.true_x**2 + a.true_y**2)
a["absZ"] = a.true_z.abs()

a["sigma_loc0"] = np.sqrt(a.var_loc0.clip(lower=0))
a["sigma_loc1"] = np.sqrt(a.var_loc1.clip(lower=0))

g = a.groupby(["volume_id","layer_id"])

layer_table = pd.DataFrame({
    "R_mean": g["R"].median(),
    "Z_abs_mean": g["absZ"].median(),
    "sigma_loc0_med": g["sigma_loc0"].median(),
    "sigma_loc1_med": g["sigma_loc1"].median(),
    "resid0_RMS": g["residual_loc0"].std(ddof=0),
    "resid1_RMS": g["residual_loc1"].std(ddof=0),
    "pull0_RMS": g["pull_loc0"].std(ddof=0),
    "pull1_RMS": g["pull_loc1"].std(ddof=0),
    "n_hits": g.size()
}).reset_index()

# Quick quality checks
# - pulls should have RMS ~ 1 per layer if variances are well calibrated
# - resid RMS should be ~ sigma if projection is simple & unbiased


In [20]:
layer_table

Unnamed: 0,volume_id,layer_id,R_mean,Z_abs_mean,sigma_loc0_med,sigma_loc1_med,resid0_RMS,resid1_RMS,pull0_RMS,pull1_RMS,n_hits
0,17,2,32.212589,37.247871,0.015,0.015,0.014785,0.014759,0.985699,0.983948,128301
1,17,4,68.192879,37.338409,0.015,0.015,0.0148,0.014756,0.986673,0.983746,123291
2,17,6,114.20594,37.277042,0.015,0.015,0.014776,0.014788,0.985075,0.985848,119738
3,17,8,170.203369,37.248318,0.015,0.015,0.014798,0.014841,0.986519,0.989393,120775
4,24,2,259.958557,37.04097,0.043,1.2,0.042593,1.181592,0.990533,0.98466,115326
5,24,4,359.965088,36.99424,0.043,1.2,0.042334,1.175907,0.984511,0.979923,116720
6,24,6,499.96817,37.046844,0.043,1.2,0.042503,1.176368,0.988441,0.980307,117055
7,24,8,659.980103,37.037098,0.043,1.2,0.042306,1.177817,0.983861,0.981514,116083
8,29,2,820.219971,37.001289,0.072,,0.070814,,0.983521,,219578
9,29,4,1020.16803,37.076996,0.072,,0.070996,,0.98605,,235259


In [16]:
result

Unnamed: 0,vol_id,sur_id,sur_type,sur_r_mm,x_perp_mm,xOverX0_perp,X0_eff_mm,x_path_mm,xOverX0_path,n_steps
0,0,144115325514809344,1,24.0,108.800003,0.308425,352.759796,108.847794,0.308561,136
1,0,1224979236620599296,1,39.360706,533.05658,1.527264,349.0271,533.291992,1.528007,518
2,0,1224979374059552768,1,74.80587,561.439575,1.586659,353.85022,561.698425,1.587393,585
3,0,1224979511498506240,1,120.597031,531.077942,1.601546,331.603271,531.320129,1.602278,603
4,0,1224979648937459712,1,176.493729,563.056335,1.667481,337.668854,563.301208,1.668238,642
5,0,1441152018197512192,1,203.0,272.0,0.968847,280.746033,272.119476,0.969273,136
6,0,1729382394617659392,1,247.409409,942.2995,2.205862,427.179749,942.70636,2.206867,705
7,0,1729382532056612864,1,347.102966,914.497253,2.273981,402.156921,914.902954,2.27498,668
8,0,1729382669495566336,1,486.88089,931.991943,2.212626,421.215302,932.395142,2.213562,686
9,0,1729382806934519808,1,646.74292,968.717041,2.560436,378.340607,969.148682,2.561691,689
