In [1]:
# Path: notebooks/SPVCNN_preprocessing/root_to_pickle.ipynb
# This is a script to convert root files to pickle files
import uproot
import numpy as np
import pandas as pd
from pathlib import Path
import awkward as ak
import os, sys
from tqdm import tqdm

sys.path.append(os.path.abspath(os.path.join("..", "src")))
from pathlib import Path

samples_folder = Path("../../samples")

In [2]:
file_path = (
    samples_folder / "sample/n1000_npu50_IVF/vertex_n1000_npu50_IVF_0429.root"
)

vertex_algo = "IVF"

In [3]:
file_path.as_posix()

'../../samples/sample/n1000_npu50_IVF/vertex_n1000_npu50_IVF_0429.root'

In [4]:
file = uproot.open(file_path)

In [5]:
file.keys()

['ML_tracks;3', 'ML_tracks;2', 'vertexing;1']

In [7]:
root_dir = uproot.open(file_path)

ML_tracks_tree = root_dir["ML_tracks"]

In [8]:
ML_tracks_tree.keys()

['event_nr',
 'reco_PriVtxX',
 'reco_PriVtxY',
 'reco_PriVtxZ',
 'truth_PriVtxX',
 'truth_PriVtxY',
 'truth_PriVtxZ',
 'reco_trk_d0',
 'reco_trk_z0',
 'reco_trk_phi',
 'reco_trk_theta',
 'reco_trk_qop',
 'reco_trk_time',
 'reco_trk_err_d0',
 'reco_trk_err_z0',
 'reco_trk_err_phi',
 'reco_trk_err_theta',
 'reco_trk_err_qop',
 'reco_trk_err_time',
 'reco_trk_truth_particle_match_prob',
 'reco_trk_truth_particle_match_Id',
 'reco_trk_reco_vtx_chi2Track',
 'reco_trk_reco_vtx_ndf',
 'reco_trk_reco_vtx_vertexCompatibility',
 'reco_trk_reco_vtx_trackWeight',
 'reco_trk_truth_vtx_idx',
 'reco_trk_reco_vtx_idx']

In [170]:
features = ["d0", "z0", "phi", "theta", "qop"]
reading_branches = features + [f"err_{feature}" for feature in features]

In [213]:
raw_data_dir = Path("./tmp")
raw_data_dir.mkdir(
    parents=True, exist_ok=True
) if not raw_data_dir.exists() else None
reco_trk_prefix = "reco_trk_"

ML_tracks_dict = {}
for branch in reading_branches:
    ML_tracks_dict[branch] = ML_tracks_tree[reco_trk_prefix + branch].array()

ML_tracks_dict["truth_instance_label"] = ML_tracks_tree[
    "reco_trk_truth_vtx_idx"
].array()

reco_trk_reco_vtx_idx = ML_tracks_tree["reco_trk_reco_vtx_idx"].array()
reco_trk_reco_vtx_trackWeight = ML_tracks_tree[
    "reco_trk_reco_vtx_trackWeight"
].array()
reco_trk_reco_vtx_trackWeight_max_idx = ak.argmax(
    reco_trk_reco_vtx_trackWeight, axis=2, keepdims=True
)
reco_trk_reco_vtx_max_idx = ak.flatten(
    reco_trk_reco_vtx_idx[reco_trk_reco_vtx_trackWeight_max_idx], axis=2
)

ML_tracks_dict["reco_instance_label"] = reco_trk_reco_vtx_max_idx

ni = 0
for n in tqdm(range(len(ML_tracks_tree["event_nr"].array()[0:10]))):
    ML_tracks_df = {k: ML_tracks_dict[k][n] for k in ML_tracks_dict.keys()}

    truth_flat_event = pd.DataFrame(ML_tracks_df)

    truth_flat_event["truth_semantic_label"] = [1] * len(truth_flat_event)
    # for fake tracks
    truth_flat_event.loc[
        (
            (truth_flat_event["truth_instance_label"] == -999)
            | truth_flat_event["truth_instance_label"]
            == -1
        ),
        "truth_instance_label",
    ] = -1

    truth_flat_event["reco_semantic_label"] = [1] * len(truth_flat_event)
    truth_flat_event.loc[
        (truth_flat_event["reco_instance_label"] == -999),
        "reco_semantic_label",
    ] = 0

    truth_flat_event["x0"] = truth_flat_event["d0"] * np.cos(
        truth_flat_event["phi"]
    )
    truth_flat_event["y0"] = truth_flat_event["d0"] * np.sin(
        truth_flat_event["phi"]
    )

    truth_flat_event.dropna(
        inplace=True
    )  # For some cases the tracks has dummy values
    truth_flat_event["reco_instance_label"] = truth_flat_event[
        "reco_instance_label"
    ].astype(int)
    truth_flat_event.to_pickle(raw_data_dir / f"event_{n+ni:05}.pkl")
ni += n + 1

100%|██████████| 10/10 [00:00<00:00, 349.09it/s]


In [214]:
pd.read_pickle(raw_data_dir / f"event_00001.pkl")

Unnamed: 0,d0,z0,phi,theta,qop,err_d0,err_z0,err_phi,err_theta,err_qop,truth_instance_label,reco_instance_label,truth_semantic_label,reco_semantic_label,x0,y0
0,-0.000736,8.371603,-3.119525,2.907032,0.162853,0.017257,0.162365,0.000402,0.000271,0.002488,25,1,1,1,0.000735,0.000016
1,-0.022801,53.136162,-2.985911,2.978630,0.005959,0.010173,0.042189,0.000204,0.000031,0.000252,0,0,1,1,0.022525,0.003535
2,-0.035954,53.109692,-3.014758,2.979151,-0.006531,0.022887,0.080991,0.000257,0.000028,0.000341,0,0,1,1,0.035665,0.004548
3,-0.066164,-51.601113,-2.944451,0.448622,0.265900,0.014509,0.050736,0.000355,0.000280,0.002574,9,3,1,1,0.064882,0.012959
4,-0.021496,-67.670432,-2.853589,2.831891,-0.281583,0.017072,0.102854,0.000389,0.000285,0.002823,11,7,1,1,0.020611,0.006106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,-0.005377,52.924073,2.763706,2.913569,-0.219362,0.017359,0.190384,0.000418,0.000301,0.002721,0,0,1,1,0.004998,-0.001984
127,-0.467727,7.569312,2.758184,2.008527,-0.830893,0.016947,0.019795,0.000371,0.000334,0.007011,25,-999,1,0,0.433768,-0.174969
128,-0.028121,-51.634097,2.692234,1.197708,-0.447479,0.016335,0.018645,0.000316,0.000298,0.003642,9,3,1,1,0.025329,-0.012215
129,0.126705,93.126486,2.832996,2.569875,0.318058,0.016831,0.035157,0.000360,0.000271,0.004055,15,4,1,1,-0.120719,0.038483
