In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score
import uproot
import glob

In [26]:
MODE = "QG"  # "TOP" or "QG"

if MODE == "TOP":
    FILE_PATH = "../../results/mpttuned/tl10/pred.root" 
    
    LABEL_NAMES  = ['jet_isQCD', 'jet_isTop'] 
    SIGNAL_IDX   = 1
    
elif MODE == "QG":
    FILE_PATH = "../../results/mpttuned/qg10/pred.root"
    
    LABEL_NAMES  = ['jet_isG', 'jet_isQ']
    SIGNAL_IDX   = 1

print(f"[{MODE}] Loading file: {FILE_PATH}")

with uproot.open(FILE_PATH) as f:
    tree = f["Events"]
    all_branches = tree.keys()
    
    
    score_names = []
    for i, name in enumerate(LABEL_NAMES):
        candidate_1 = f"score_{name}"
        candidate_2 = f"score_{i}" 
        
        if candidate_1 in all_branches:
            score_names.append(candidate_1)
        elif candidate_2 in all_branches:
            score_names.append(candidate_2)
        else:
            raise ValueError(f"Could not find score branch for {name}. Checked {candidate_1} and {candidate_2}")

    print(f"Found score branches: {score_names}")
    
    data = tree.arrays(LABEL_NAMES + score_names, library="np")

[QG] Loading file: ../../results/mpttuned/qg10/pred.root
Found score branches: ['score_jet_isG', 'score_jet_isQ']


In [27]:
y_true_hot = np.stack([data[key] for key in LABEL_NAMES], axis=1)
y_score    = np.stack([data[key] for key in score_names], axis=1)

y_true_idx = np.argmax(y_true_hot, axis=1)
y_pred_idx = np.argmax(y_score, axis=1)

signal_scores = y_score[:, SIGNAL_IDX]

acc = accuracy_score(y_true_idx, y_pred_idx)
auc = roc_auc_score(y_true_idx, signal_scores)

fpr, tpr, thresholds = roc_curve(y_true_idx, signal_scores)

def get_rejection(target_eff):
    idx = np.abs(tpr - target_eff).argmin()
    false_pos_rate = fpr[idx]
    true_eff = tpr[idx]
    
    if false_pos_rate == 0:
        return float('inf'), true_eff
    return 1.0 / false_pos_rate, true_eff

rej50, eff50 = get_rejection(0.50)
rej30, eff30 = get_rejection(0.30)

print("\n" + "="*40)
print(f"RESULTS FOR {MODE} TAGGING")
print("="*40)
print(f"{'Metric':<15} | {'Value':<15}")
print("-" * 32)
print(f"{'Accuracy':<15} | {acc:.4f}")
print(f"{'AUC':<15} | {auc:.4f}")
print(f"{'Rej 50%':<15} | {int(rej50)}")
print(f"{'Rej 30%':<15} | {int(rej30)}")
print("="*40)


RESULTS FOR QG TAGGING
Metric          | Value          
--------------------------------
Accuracy        | 0.8422
AUC             | 0.9142
Rej 50%         | 41
Rej 30%         | 105
