In [1]:
import uproot
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
from pathlib import Path
import pickle

In [None]:
def get_match_idx(truth_flat_event, reco_flat_event):
    match_idx = []
    
    truth_trk_table = truth_flat_event[['d0','z0','phi','theta','qp']].to_numpy()
    reco_trk_table = reco_flat_event[['d0','z0','phi','theta','qp']].to_numpy()
    
    reco_vtxID_list = reco_flat_event['vtxID'].to_numpy()
    
    for truth_trk in truth_trk_table:
        reco_idx = np.flatnonzero((reco_trk_table == truth_trk).all(1))
        if reco_idx.size != 0:
            match_idx.append(reco_vtxID_list[reco_idx[0]]) # Here we just save the first 
        else:
            match_idx.append(-1)
            
    return match_idx


In [None]:
    def root_to_pickle(root_data_path, raw_data_dir):
        ni = 0
        for f in sorted(root_data_path.glob('*.root')):
            root_dir = uproot.open(f)
            truth_tree = root_dir['Truth_Vertex_PV_Selected']
            reco_tree = root_dir['Reco_Vertex']
            truth_jagged_dict = {}
            reco_jagged_dict = {}
            truth_prefix = 'truth_vtx_fitted_trk_'
            reco_prefix = 'reco_vtx_fitted_trk_'

            for k, v in tqdm(truth_tree.items()):
                if not k.startswith(truth_prefix):
                    continue
                truth_jagged_dict[k[len(truth_prefix):]] = v.array()
            
            for k, v in tqdm(reco_tree.items()):
                if not k.startswith(reco_prefix):
                    continue
                reco_jagged_dict[k[len(reco_prefix):]] = v.array()
            
            truth_jagged_dict['truth_vtxID'] = truth_jagged_dict.pop('vtxID')

            coords = ['d0', 'z0', 'phi', 'theta', 'qp']
            scale = np.array([0.05, 500, 6, 2, 4])
            for n in tqdm(range(len(truth_tree[0].array()))):
#             for n in tqdm(range(1)):
                truth_df_dict = {k: truth_jagged_dict[k][n] for k in truth_jagged_dict.keys()}
                reco_df_dict = {l: reco_jagged_dict[l][n] for l in reco_jagged_dict.keys()}
                
                truth_flat_event = pd.DataFrame(truth_df_dict)
                reco_flat_event = pd.DataFrame(reco_df_dict)
                truth_flat_event['truth_semantic_label'] = [1] * len(truth_flat_event)
                
                match_idx = get_match_idx(truth_flat_event, reco_flat_event)
                truth_flat_event['reco_AMVF_vtxID'] = match_idx
                truth_flat_event['reco_semantic_label'] = [1] * len(truth_flat_event)
                
                idx_not_found = truth_flat_event['reco_AMVF_vtxID'] == -1
                truth_flat_event.loc[idx_not_found,'reco_semantic_label'] = [0]*len(truth_flat_event['reco_semantic_label'].loc[idx_not_found])
                
#                 truth_flat_event[coords] /= scale
                truth_flat_event.to_pickle(raw_data_dir / f'event_{n+ni:05}.pkl')
            ni += n + 1

In [2]:
root_data_path = Path('/Users/dejavu/Projects/Vertexing')
raw_data_dir = root_data_path / 'raw'


In [None]:
root_to_pickle(root_data_path, raw_data_dir)

In [3]:
with open('/Users/dejavu/Projects/Vertexing/raw/event_00000.pkl', 'rb') as f:
    y = pickle.load(f)
y

Unnamed: 0,d0,z0,phi,theta,qp,time,err_d0,err_z0,err_phi,err_theta,err_qp,err_time,truth_vtxID,truth_semantic_label,reco_AMVF_vtxID,reco_semantic_label
0,-0.061594,-17.401218,1.395401,0.189386,0.204095,-250.224625,0.058591,0.285655,0.000508,5e-05,0.001779,299.792603,0,1,0,1
1,0.02644,-17.499319,1.091079,0.229506,0.253887,-24.408846,0.077674,0.273776,0.001445,0.000167,0.002225,299.792542,0,1,0,1
2,0.060752,-17.408518,1.399242,1.480304,-1.696791,-212.961792,0.030237,0.041791,0.000716,0.001406,0.011217,299.79245,0,1,0,1
3,-0.065326,-17.385729,-1.952685,0.461722,-0.688754,-637.556152,0.049847,0.13697,0.00107,0.00052,0.005467,299.79248,0,1,0,1
4,-0.007053,-17.408218,-2.383921,1.591106,1.085591,-728.539978,0.030916,0.037626,0.000886,0.001116,0.007255,299.79245,0,1,0,1
5,0.014746,-17.728697,1.717623,0.209438,-0.144749,-330.114594,0.066231,0.284818,0.001254,0.000118,0.001289,299.792542,0,1,0,1
6,0.014632,-17.383392,-1.132227,1.465986,-0.922339,-589.72467,0.032336,0.036034,0.000872,0.001023,0.006205,299.79245,0,1,0,1
7,0.060646,-17.359617,-0.235356,0.784103,-0.97332,-333.182037,0.038154,0.048186,0.001061,0.000841,0.00713,299.79245,0,1,0,1
8,-0.001376,-17.399632,-2.533,1.380604,-1.95959,-643.867065,0.030246,0.043109,0.000509,0.001477,0.01301,299.79245,0,1,0,1
9,0.009593,-17.465534,-0.978418,0.319597,-0.03632,-495.950409,0.021168,0.084267,0.000364,7.4e-05,0.00031,299.79248,0,1,0,1


In [4]:
# from calo_cluster.evaluation.experiments.base_experiment import BaseExperiment
# from pathlib import Path 
# from calo_cluster.clustering.meanshift import MeanShift
# from tqdm import tqdm
from calo_cluster.evaluation.metrics.instance import PanopticQuality

In [5]:
pq = PanopticQuality(num_classes=2, semantic=True, ignore_semantic_labels=[0])
for file in tqdm(sorted(raw_data_dir.glob('*.pkl'))):
# for file in tqdm(sorted(raw_data_dir.glob('event_00000.pkl'))):
    with open(file, 'rb') as f:
        evt = pickle.load(f)
    pq.add((evt['truth_semantic_label'].astype(int).to_numpy(), evt['truth_vtxID'].astype(int).to_numpy()), 
           (evt['reco_semantic_label'].astype(int).to_numpy(), evt['reco_AMVF_vtxID'].astype(int).to_numpy()))
    
#     pq = PanopticQuality(num_classes=2, semantic=False)
#     pq.add(evt['AMVF_reco_ID'].astype(int).to_numpy(), evt['truth_vtxID'].astype(int).to_numpy(), )

pq_score = pq.compute()

100%|████████████████████████████████████| 10000/10000 [00:12<00:00, 778.62it/s]


In [6]:
pq_score

{'sq': array([-1.        ,  0.90251713]),
 'rq': array([-1.        ,  0.88913318]),
 'pq': 0.8024579294236318,
 'tq': array([-1.        ,  0.87820422]),
 'wrq': array([-1.        ,  0.97894692]),
 'wpq': 0.8835163670361004,
 'wtq': array([-1.       ,  0.9797166])}