In [1]:
import dask_awkward as dak
import awkward as ak
from distributed import LocalCluster, Client, progress
import time
import numpy as np
import matplotlib.pyplot as plt
import json
import mplhep as hep
import glob
import pandas as pd

plt.style.use(hep.style.CMS)

client =  Client(n_workers=40,  threads_per_worker=1, processes=True, memory_limit='8 GiB') 


In [2]:
"""
This code prints ggH/VBF channel yields after applying category cuts
"""

def applyVBF_cutV1(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    VBF_filter = (
        vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    trues = ak.ones_like(dimuon_mass, dtype="bool")
    falses = ak.zeros_like(dimuon_mass, dtype="bool")
    events["vbf_filter"] = ak.where(VBF_filter, trues,falses)
    return events[VBF_filter]

def applyGGH_cutV1(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]


def applyGGH_NoBtagNjet1(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = (btagLoose_filter | btagMedium_filter)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def applyGGH_30(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = (btagLoose_filter | btagMedium_filter)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35)   & (events.jet2_pt_nominal > 30) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    jet_30_cut = ak.fill_none((events.jet1_pt_nominal > 30), value=False)
    ggH_filter = (
        ~vbf_cut 
        & ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def applyVBF_30(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35)   & (events.jet2_pt_nominal > 30)
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    VBF_filter = (
        vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    trues = ak.ones_like(dimuon_mass, dtype="bool")
    falses = ak.zeros_like(dimuon_mass, dtype="bool")
    events["vbf_filter"] = ak.where(VBF_filter, trues,falses)
    return events[VBF_filter]


def applyGGH_cutflow(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = btagLoose_filter | btagMedium_filter
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def applyGGH_noJetPt(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5)
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def veto_ttH_VH(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = btagLoose_filter | btagMedium_filter
    
    bool_filter = (
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[bool_filter]


def veto_nJetGeq3(events):
    njet_filter = ak.fill_none((events.njets_nominal <= 2), value=True)
    bool_filter = (
        njet_filter # btag cut is for VH and ttH categories
    )
    return events[bool_filter]

def filterRegion(events, region="h-peak"):
    dimuon_mass = events.dimuon_mass
    if region =="h-peak":
        region = (dimuon_mass > 115.03) & (dimuon_mass < 135.03)
    elif region =="h-sidebands":
        region = ((dimuon_mass > 110) & (dimuon_mass < 115.03)) | ((dimuon_mass > 135.03) & (dimuon_mass < 150))
    elif region =="signal":
        region = (dimuon_mass >= 110) & (dimuon_mass <= 150.0)
    elif region =="z-peak":
        region = (dimuon_mass >= 70) & (dimuon_mass <= 110.0)
    elif region =="combined":
        region = (dimuon_mass >= 70) & (dimuon_mass <= 150.0)

    # mu1_pt = events.mu1_pt
    # mu1ptOfInterest = (mu1_pt > 75) & (mu1_pt < 150.0)
    # events = events[region&mu1ptOfInterest]
    events = events[region]
    return events

V1_fields_2compute = [
    "wgt_nominal",
    "nBtagLoose_nominal",
    "nBtagMedium_nominal",
    "mu1_pt",
    "mu2_pt",
    "mu1_eta",
    "mu2_eta",
    "mu1_phi",
    "mu2_phi",
    "dimuon_pt",
    "dimuon_eta",
    "dimuon_phi",
    "dimuon_mass",
    "jet1_phi_nominal",
    "jet1_pt_nominal",
    "jet2_pt_nominal",
    "jet2_phi_nominal",
    "jet1_eta_nominal",
    "jet2_eta_nominal",
    "jj_mass_nominal",
    "jj_dEta_nominal",
    # "region",
    "event",
    "njets_nominal",
    # "run",
    # "event",
    # "luminosityBlock",
]
 
#

In [3]:
year = "2018"
# year="*"
# year = "2017"
# label="V2_Jan29_JecOn_TrigMatchFixed_2016UlJetIdFix"

# label="DYamcNLO_11Apr2025"
# label="UpdatedDY_100_200_CrossSection_24Feb_jetpuidOff"
# label="test_test"
# label="DYMiNNLO_30Mar2025"
# label="DYMiNNLO_11Apr2025"
# label="DYMiNNLO_HemVetoOff_17Apr2025"
# label="DYMiNNLO_HemVetoOff_18Apr2025_singleMuTrigMatch"
# label="jetHornStudy_29Apr2025_JecOnJerOff"
# # label="jetHornStudy_29Apr2025_JecOnJerStrat2_jetHornPtCut50"
# label="jetHornStudy_29Apr2025_JecOnJerStrat1n2_jetHornTightPuId"
label="fullRun_May30_2025"

# # year = "2022preEE"
# # label="Run3_nanoAODv12_TEST"
load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}/f1_0"
# load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}/*"



# label="May28_NanoV12"
# load_path =f"/depot/cms/users/shar1172/hmm/copperheadV1clean/{label}/stage1_output/{year}/*"

# # events_data = dak.from_parquet(f"{load_path}/data_D/*.parquet")
# # events_data = dak.from_parquet(f"{load_path}/data_F/*.parquet")
# # filelist = glob.glob(f"{load_path}/data_F")
# # filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/vbf_powheg_dipole")
# filelist = glob.glob(f"{load_path}/data_D")
# print(filelist)
# filelist = glob.glob(f"{load_path}/dy*")
filelist = glob.glob(f"{load_path}/dy*100*")
total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*/*.parquet")
    events_data = ak.zip({field: events_data[field] for field in V1_fields_2compute}).compute()
    events_data = filterRegion(events_data, region="signal")
    # events_data = applyGGH_cutV1(events_data)
    # events_data = applyGGH_NoBtagNjet1(events_data)
    # events_data = veto_ttH_VH(events_data)
    events_data = applyVBF_cutV1(events_data)
    
    # events_data = applyGGH_30(events_data)
    # events_data = applyVBF_30(events_data)
    



    
    # data_yield = ak.sum(events_data.wgt_nominal, axis=0)
    wgts = ak.fill_none(events_data.wgt_nominal, value=1.0)
    data_yield = ak.sum(wgts)
    df = pd.DataFrame({field: ak.fill_none(events_data[field], value=-999.9) for field in events_data.fields})
    print(f"data_yield for {file}: {data_yield}")
    total_integral += data_yield
total_integral


file: /depot/cms/users/yun79/hmm/copperheadV1clean/fullRun_May30_2025/stage1_output/2018/f1_0/dy_M-100To200_MiNNLO
data_yield for /depot/cms/users/yun79/hmm/copperheadV1clean/fullRun_May30_2025/stage1_output/2018/f1_0/dy_M-100To200_MiNNLO: 5684.364850861507


5684.364850861507

In [4]:


label="May28_NanoV12"
load_path =f"/depot/cms/users/shar1172/hmm/copperheadV1clean/{label}/stage1_output/{year}/*"

# # events_data = dak.from_parquet(f"{load_path}/data_D/*.parquet")
# # events_data = dak.from_parquet(f"{load_path}/data_F/*.parquet")
# # filelist = glob.glob(f"{load_path}/data_F")
# # filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/vbf_powheg_dipole")
# filelist = glob.glob(f"{load_path}/data_D")
# print(filelist)
# filelist = glob.glob(f"{load_path}/dy*")
filelist = glob.glob(f"{load_path}/dy*100*")

total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*/*.parquet")
    events_data = ak.zip({field: events_data[field] for field in V1_fields_2compute}).compute()
    events_data = filterRegion(events_data, region="signal")
    # events_data = applyGGH_cutV1(events_data)
    # events_data = applyGGH_NoBtagNjet1(events_data)
    # events_data = veto_ttH_VH(events_data)
    events_data = applyVBF_cutV1(events_data)
    
    # events_data = applyGGH_30(events_data)
    # events_data = applyVBF_30(events_data)
    



    
    # data_yield = ak.sum(events_data.wgt_nominal, axis=0)
    wgts = ak.fill_none(events_data.wgt_nominal, value=1.0)
    data_yield = ak.sum(wgts)
    df = pd.DataFrame({field: ak.fill_none(events_data[field], value=-999.9) for field in events_data.fields})
    print(f"data_yield for {file}: {data_yield}")
    total_integral += data_yield
total_integral


file: /depot/cms/users/shar1172/hmm/copperheadV1clean/May28_NanoV12/stage1_output/2018/f1_0/dy_M-100To200_MiNNLO
data_yield for /depot/cms/users/shar1172/hmm/copperheadV1clean/May28_NanoV12/stage1_output/2018/f1_0/dy_M-100To200_MiNNLO: 5426.78557035023


5426.78557035023

In [None]:
year = "2017"
# year = "*"
# year = "2016"
# label="V2_Jan29_JecOn_TrigMatchFixed_2016UlJetIdFix"
# label="test_test"
# label="DYMiNNLO_30Mar2025"
# label="rereco_yun_Dec05_btagSystFixed_JesJerUncOn"
# label="test_cutflow"
# label="test_cutflow_applyAllMuCorrection_17Apr2025"
# label="test_cutflow_applyEcalGapVeto_17Apr2025"
# label="test"
label="Run2Rereco_synch_Apr23_2025"
label="jetHornStudy_29Apr2025_JecOnJerOff"
# year = "2022preEE"
# label="Run3_nanoAODv12_TEST"
load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}"
# load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/*"


# # events_data = dak.from_parquet(f"{load_path}/data_D/*.parquet")
# # events_data = dak.from_parquet(f"{load_path}/data_F/*.parquet")
# cle# filelist = glob.glob(f"{load_path}/data_F")
# # filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/data_*")
filelist = glob.glob(f"{load_path}/data_B")

# filelist = glob.glob(f"{load_path}/dy*")
total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*.parquet")
    print(events_data.fields)

    events_data = ak.zip({field: events_data[field] for field in V1_fields_2compute}).compute()
    # events_data = filterRegion(events_data, region="signal")
    # events_data = applyGGH_cutV1(events_data)
    # events_data = veto_nJetGeq3(events_data)
    # events_data = veto_ttH_VH(events_data)
    # events_data = applyGGH_new(events_data)
    
    # print(f"events_data.jet1_pt_nominal : {events_data.jet1_pt_nominal }")
    # print(f"events_data.jj_mass_nominal: {events_data.jj_mass_nominal}")
    # print(f"events_data.jj_dEta_nominal: {events_data.jj_dEta_nominal}")
    # print(f"events_data.nBtagLoose_nominal: {events_data.nBtagLoose_nominal}")
    # print(f"events_data.nBtagMedium_nominal: {events_data.nBtagMedium_nominal}")
    # print(f"events_data.njets_nominal: {events_data.njets_nominal}")
    # print(f"not btag_cut: {~btag_cut}")
    # print(f"ggH_filter: {ggH_filter}")
    
    
    
    # events_data = applyGGH_noJetPt(events_data)
    # events_data = applyVBF_cutV1(events_data)
    # events_data = veto_ttH_VH(events_data)
    
    
    data_yield = ak.num(events_data.dimuon_mass, axis=0)
    # wgts = ak.fill_none(events_data.wgt_nominal, value=1.0)
    # data_yield = ak.sum(wgts)
    df = pd.DataFrame({field: ak.fill_none(events_data[field], value=-999.9) for field in events_data.fields})
    print(f"data_yield for {file}: {data_yield}")
    total_integral += data_yield
total_integral


In [None]:
ak.max(events_data.njets_nominal)

In [None]:
events_data.jet1_pt_nominal
# events_data.njets_nominal

In [None]:
print(events_data.njets_nominal[:50] <=2)
print(ak.fill_none(events_data.njets_nominal[:50] <=2, value=True))

In [None]:
year = "2018"
# label="V2_Jan29_JecOn_TrigMatchFixed_2016UlJetIdFix"
# label="DYMiNNLO_30Mar2025"
label="jetHornStudy_29Apr2025_JecOnJerOff"

# label="test_test"
# year = "2022preEE"
# label="Run3_nanoAODv12_TEST"
load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}/f1_0"

# filelist = glob.glob(f"{load_path}/dy*")
filelist = glob.glob(f"{load_path}/dy_M-50_MiNNLO")

total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*/*.parquet")
    # events_data = filterRegion(events_data, region="signal")
    events_data = filterRegion(events_data, region="z-peak")
    wgt = events_data.wgt_nominal.compute()
    # print(f"wgt sum: {wgt}")
    print(f"wgt sum: {ak.sum(wgt)}")
    comp = ak.ones_like(wgt)
    for field in events_data.fields:
        if "separate" in field:
            value = events_data[field].compute()
            print(f"{field} arr: {value}")
            comp = comp*value
            # print(f"{field} curent wgt: {comp}")
    # diff = comp- wgt
    # print(f"comp : {comp}")
    # print(f"wgt : {wgt}")
    # print(f"sum wgt : {ak.sum(wgt)}")
    # print(f"difference : {diff}")
            # print(f"{field} max val: {ak.max(value)}")

In [None]:
2.36e+03 * 228348879
41,158,111.73464724
191,709,872

In [None]:
2.5292969635125805e+20 

In [None]:
wgt_nominal = events_data["wgt_nominal"].compute()
ak.sum(wgt_nominal)

In [None]:
test = wgt_nominal/ events_data["separate_wgt_qgl_wgt"].compute()
ak.sum(test)

In [None]:
gen_wgt = events_data["separate_wgt_genWeight"].compute()
ak.sum(gen_wgt)

In [None]:
ak.sum(gen_wgt)*7.1e-12

In [None]:
events_data["separate_wgt_genWeight_normalization"].compute()

In [None]:
ak.sum(events_data["wgt_nominal"].compute())

In [None]:
year="2018"

"16" in year or "17" in year or "18" in year