In [5]:
import numpy as np
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score
import uproot
import glob

In [6]:
classes = ['QCD', 'Hbb', 'Hcc', 'Hgg', 'H4q', 'Hqql', 'Zqq', 'Wqq', 'Tbqq', 'Tbl']
n_classes = len(classes)
label_list = [f'label_{cls}' for cls in classes]
score_list = [f'score_label_{cls}' for cls in classes]

full_path = "../../results/perf/switchhead_4h_4e_2a/pred_*.root"
#full_path = "../training/JetClass/Pythia/full/LinformerPairWise/20240919-184207_example_LinformerPairwise_ranger_lr0.001_batch512/predict_output/pred_*.root"

arrays = []
concat_arrays = {}
for file_name in glob.glob(full_path):
    with uproot.open(file_name) as f:
        print(file_name)
        arrays.append(f["Events"].arrays(label_list + score_list))
for key in label_list + score_list:
    concat_arrays[key] = np.concatenate([arrays[i][key].to_numpy() for i in range(len(arrays))])

y_prob = np.stack([concat_arrays[key] for key in score_list], axis=1)
labels = np.stack([concat_arrays[key] for key in label_list], axis=1).astype(int)

../../results/perf/switchhead_4h_4e_2a/pred_TTBarLep.root
../../results/perf/switchhead_4h_4e_2a/pred_HToBB.root
../../results/perf/switchhead_4h_4e_2a/pred_ZJetsToNuNu.root
../../results/perf/switchhead_4h_4e_2a/pred_HToWW4Q.root
../../results/perf/switchhead_4h_4e_2a/pred_WToQQ.root
../../results/perf/switchhead_4h_4e_2a/pred_HToGG.root
../../results/perf/switchhead_4h_4e_2a/pred_TTBar.root
../../results/perf/switchhead_4h_4e_2a/pred_HToCC.root
../../results/perf/switchhead_4h_4e_2a/pred_ZToQQ.root
../../results/perf/switchhead_4h_4e_2a/pred_HToWW2Q1L.root


In [7]:
overall_roc_auc = roc_auc_score(labels, y_prob, average='macro', multi_class='ovo')
predicted_labels = np.argmax(y_prob, axis=1) 
true_labels = np.argmax(labels, axis=1)  

accuracy = accuracy_score(true_labels, predicted_labels)

print(f'Overall ROC AUC = {overall_roc_auc:.4f}, Accuracy = {accuracy:.4f}')


scores = y_prob / (y_prob[:, :1] + y_prob) # defaults to 0.5 for QCD (not used)

rejections = []

for i in range(1, n_classes):
    if i == 5:
        percent = 0.99
    elif i == 9:
        percent = 0.995
    else:
        percent = 0.5
    
    mask = (labels[:, 0] == 1) | (labels[:, i] == 1)
    filtered_labels = labels[mask]
    filtered_scores = scores[mask]
    
    binary_labels = (filtered_labels[:, i] == 1).astype(int)
    binary_scores = filtered_scores[:, i]
    
    fpr, tpr, thresholds = roc_curve(binary_labels, binary_scores)

    idx = np.abs(tpr - percent).argmin()
    
    if fpr[idx] != 0:
        rejection = 1 / fpr[idx]
    else:
        rejection = np.inf  
    
    rejections.append(rejection)
    
    print(f'Rejection at {percent*100}% for {label_list[i]}: {rejection}')

Overall ROC AUC = 0.9855, Accuracy = 0.8467
Rejection at 50.0% for label_Hbb: 8264.462809917355
Rejection at 50.0% for label_Hcc: 2832.8611898016998
Rejection at 50.0% for label_Hgg: 109.40919037199124
Rejection at 50.0% for label_H4q: 1137.009664582149
Rejection at 99.0% for label_Hqql: 3179.650238473768
Rejection at 50.0% for label_Zqq: 311.57501168406293
Rejection at 50.0% for label_Wqq: 409.8360655737705
Rejection at 50.0% for label_Tbqq: 12578.61635220126
Rejection at 99.5% for label_Tbl: 9569.377990430621
