In [30]:
# import modules
import uproot, sys, time, math, pickle, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import awkward as ak
from tqdm import tqdm
import seaborn as sns
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as ticker
from scipy.special import betainc
from scipy.stats import norm

# import config functions
sys.path.append("/eos/user/j/jlai/SWAN_projects/dark_photon_mc23e/config")
from config.jet_faking_plot_config import getWeight, zbi, sample_dict, getVarDict # 135 lumi
# from jet_faking_26_config import getWeight, zbi, sample_dict, getVarDict # 26 lumi
from config.plot_var import variables, variables_data, ntuple_names, ntuple_names_BDT
from n_1_iteration_functions import get_best_cut, calculate_significance, apply_cut_to_fb, apply_all_cuts, compute_total_significance, n_minus_1_optimizer
# from cut_config import cut_config

# Set up plot defaults
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = 14.0,10.0  # Roughly 11 cm wde by 8 cm high  
mpl.rcParams['font.size'] = 20.0 # Use 14 point font
sns.set(style="whitegrid")

font_size = {
    "xlabel": 17,
    "ylabel": 17,
    "xticks": 15,
    "yticks": 15,
    "legend": 14
}

plt.rcParams.update({
    "axes.labelsize": font_size["xlabel"],  # X and Y axis labels
    "xtick.labelsize": font_size["xticks"],  # X ticks
    "ytick.labelsize": font_size["yticks"],  # Y ticks
    "legend.fontsize": font_size["legend"]  # Legend
})


In [31]:
path = "/eos/user/p/piazza/gghyyd/mergedNtuples/MC23d/withBDTSkim/data23_y_nominal_bdt.root"
f = uproot.open(path)['nominal']
f.keys()

['run',
 'event',
 'year',
 'averageIntPerXing',
 'actualIntPerXing',
 'trigger_HLT_g140_loose_L1eEM26M',
 'trigger_HLT_g50_tight_xe40_cell_xe50_pfopufit_80mTAC_L1eEM26M',
 'trigger_HLT_g50_tight_xe40_cell_xe70_pfopufit_80mTAC_EM22VHI',
 'trigger_HLT_g50_tight_xe40_cell_xe70_pfopufit_80mTAC_L1eEM26M',
 'trigger_HLT_xe65_cell_xe90_pfopufit_L1XE50',
 'trigger_single_el',
 'trigger_single_mu',
 'trigger_diel',
 'trigger_dimu',
 'trigger_matched_el',
 'trigger_matched_mu',
 'n_jet',
 'n_jet_central',
 'n_jet_fwd',
 'n_jet_failJVT',
 'n_bjet',
 'n_ph',
 'n_ph_baseline',
 'n_ph_loose',
 'n_el',
 'n_el_baseline',
 'n_mu',
 'n_mu_baseline',
 'n_tau_baseline',
 'met_tst_sig',
 'met_tst_noJVT_sig',
 'jet_vecSumPt',
 'failJVT_jet_vecSumPt',
 'failJVT_jet_vecSumPhi',
 'failJVT_jet_vecSumEta',
 'jet_sumPt',
 'failJVT_jet_sumPt',
 'jet_central_vecSumPt',
 'jet_central_sumPt',
 'jet_fwd_vecSumPt',
 'jet_fwd_sumPt',
 'n_pv',
 'pv_z',
 'pv_x',
 'pv_y',
 'pv_ntracks',
 'vtx_sumPt',
 'vtx_sumPt2',
 'z_as

In [32]:
path = "/eos/user/p/piazza/gghyyd/mergedNtuples/MC23d/withBDTSkim/mc23d_ggHyyd_y_nominal_bdt.root"
f = uproot.open(path)['nominal']
f.keys()
fb = f.arrays(["dmet", "dphi_met_phterm"], library="ak")
fb['dphi_met_phterm'], fb['dmet']

(<Array [-0.309, -1.3, -2.61, ..., 1.94, 0.214, -0.0399] type='17999 * float32'>,
 <Array [0, 0, 0, -9.87e+03, ..., 0, 0, 0, -2.41e+04] type='17999 * float32'>)

In [5]:

tot = []
data = pd.DataFrame()
ntuple_names = ['ggHyyd','Zjets','Zgamma','Wgamma','Wjets','gammajet_direct', 'data23']

def test(fb):
    # checking if there are any none values
    mask = ak.is_none(fb['met_tst_et'])
    n_none = ak.sum(mask)
    print("Number of none values: ", n_none)
    # if n_none > 0:
    #     fb = fb[~mask]
    # print("Events after removing none values: ", len(fb), ak.sum(ak.is_none(fb['met_tst_et'])))

def print_cut(ntuple_name, fb, label):
    print(f"Unweighted Events {label}: ", len(fb))
    if ntuple_name == 'data23':
        print(f"Weighted Events {label}: ", sum(getWeight(fb, ntuple_name, jet_faking=True)))
    else: 
        print(f"Weighted Events {label}: ", sum(getWeight(fb, ntuple_name)))

for i in range(len(ntuple_names)):
    start_time = time.time()
    ntuple_name = ntuple_names[i]
    if ntuple_name == 'data23': # data
        path = f"/eos/user/p/piazza/gghyyd/mergedNtuples/MC23d/withBDTSkim/data23_y_nominal_bdt.root" 
        print('processing file: ', path)
        f = uproot.open(path)['nominal']
        fb = f.arrays(variables_data, library="ak")
        fb['VertexBDTScore'] = fb['BDTScore'] # renaming BDTScore to ensure this is recognized as Vertex BDT Score
        
        fb = fb[ak.num(fb['ph_eta']) > 0]     # for abs(ak.firsts(fb['ph_eta'])) to have value to the reweighting
                
        mask1 = (ak.firsts(fb['ph_topoetcone40'])-2450.)/ak.firsts(fb['ph_pt']) > 0.1   # jet_faking_photon cut
        fb = fb[mask1]
        fb = fb[fb['n_ph_baseline'] == 1]

    else: # MC
        path = f"/eos/user/p/piazza/gghyyd/mergedNtuples/MC23d/withBDTSkim/{ntuple_name}_y_nominal_bdt.root" 
        print('processing file: ', path)
        f = uproot.open(path)['nominal']
        fb = f.arrays(variables, library="ak")

        # add BDT score to fb
        f_BDT = uproot.open(path_BDT)['nominal']
        fb_BDT = f_BDT.arrays(["event", "BDTScore"], library="ak")
        tmp = fb["event"] == fb_BDT["event"]
        if np.all(tmp) == True:
            fb["VertexBDTScore"] = fb_BDT["BDTScore"]
        else: 
            print("Something is wrong, need arranging")

        fb = fb[ak.num(fb['ph_eta']) > 0]     # for abs(ak.firsts(fb['ph_eta'])) to have value to the reweighting
        fb = fb[fb['n_ph'] == 1]
        
        # Zjets and Wjets (rule out everything except for e->gamma)
        if ntuple_name == 'Zjets' or ntuple_name == 'Wjets':
            mask = ak.firsts(fb['ph_truth_type']) == 2
            fb = fb[mask]
        
        # goodPV on signal only
        if ntuple_name == 'ggHyyd':
            fb = fb[ak.num(fb['pv_z']) > 0]
            good_pv_tmp = (np.abs(ak.firsts(fb['pv_truth_z']) - ak.firsts(fb['pv_z'])) <= 0.5)
            fb = fb[good_pv_tmp]

    print_cut(ntuple_name, fb, 'before cut')

    fb = fb[fb['n_mu_baseline'] == 0]
    fb = fb[fb['n_el_baseline'] == 0]
    fb = fb[fb['n_tau_baseline'] == 0]
    fb = fb[fb['trigger_HLT_g50_tight_xe40_cell_xe70_pfopufit_80mTAC_L1eEM26M']==1]
    fb = fb[ak.num(fb['ph_pt']) > 0] # prevent none values in Tbranch
    fb = fb[ak.firsts(fb['ph_pt']) >= 50000] # ph_pt cut (basic cut)
    fb = fb[fb['met_tst_et'] >= 100000] # MET cut (basic cut)
    fb = fb[fb['n_jet_central'] <= 3] # n_jet_central cut (basic cut)

    # mt_tmp = np.sqrt(2 * fb['met_tst_et'] * ak.firsts(fb['ph_pt']) * 
    #                         (1 - np.cos(fb['met_tst_phi'] - ak.firsts(fb['ph_phi'])))) / 1000
    # mask1 = mt_tmp > 80
    # fb = fb[mask1]
    # mask1 = mt_tmp > 100
    # mask2 = mt_tmp < 140 
    # fb = fb[mask1 * mask2]

    fb = fb[fb['VertexBDTScore'] > 0.1]

    
    # # Selection cut
    # metsig_tmp = fb['met_tst_sig'] 
    # mask1 = metsig_tmp > 7
    # mask2 = metsig_tmp < 16
    # fb = fb[mask1 * mask2]
    
    # ph_eta_tmp = np.abs(ak.firsts(fb['ph_eta']))
    # fb = fb[ph_eta_tmp < 1.74]

    # dphi_met_phterm_tmp = np.arccos(np.cos(fb['met_tst_phi'] - fb['met_phterm_phi'])) # added cut 3
    # fb = fb[dphi_met_phterm_tmp > 1.34]

    # dmet_tmp = fb['met_tst_noJVT_et'] - fb['met_tst_et']
    # mask1 = dmet_tmp > -17900
    # mask2 = dmet_tmp < 41900
    # fb = fb[mask1 * mask2]

    # dphi_jj_tmp = fb['dphi_central_jj']
    # dphi_jj_tmp = ak.where(dphi_jj_tmp == -10, np.nan, dphi_jj_tmp)
    # dphi_jj_tmp = np.arccos(np.cos(dphi_jj_tmp))
    # dphi_jj_tmp = ak.where(np.isnan(dphi_jj_tmp), -999, dphi_jj_tmp)
    # fb = fb[dphi_jj_tmp < 2.58]

    # dphi_met_jetterm_tmp = np.where(fb['met_jetterm_et'] != 0,   # added cut 5
    #                     np.arccos(np.cos(fb['met_tst_phi'] - fb['met_jetterm_phi'])),
    #                     -999)
    # fb = fb[dphi_met_jetterm_tmp < 0.73]

    # print_cut(ntuple_name, fb, 'after basic + selection cut')
    
    print_cut(ntuple_name, fb, 'after basic')

    test(fb) # check for none value

    print(f"Reading Time for {ntuple_name}: {(time.time()-start_time)} seconds\n")


    tot.append(fb)

    fb = 0
    fb_BDT = 0
    tmp = 0


processing file:  /data/tmathew/ntups/mc23d/ggHyyd_y.root


FileNotFoundError: [Errno 2] No such file or directory: '/data/tmathew/ntups/mc23d/ggHyyd_y.root'