In [2]:
import dask_awkward as dak
import awkward as ak
from distributed import LocalCluster, Client, progress
import time
import numpy as np
import matplotlib.pyplot as plt
import json
import mplhep as hep
import glob
import pandas as pd

plt.style.use(hep.style.CMS)

client =  Client(n_workers=15,  threads_per_worker=2, processes=True, memory_limit='8 GiB') 


Perhaps you already have a cluster running?
Hosting the HTTP server on port 45369 instead


In [3]:
"""
This code prints ggH/VBF channel yields after applying category cuts
"""

def applyVBF_cutV1(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    VBF_filter = (
        vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    trues = ak.ones_like(dimuon_mass, dtype="bool")
    falses = ak.zeros_like(dimuon_mass, dtype="bool")
    events["vbf_filter"] = ak.where(VBF_filter, trues,falses)
    return events[VBF_filter]

def applyGGH_cutV1(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def filterRegion(events, region="h-peak"):
    dimuon_mass = events.dimuon_mass
    if region =="h-peak":
        region = (dimuon_mass > 115.03) & (dimuon_mass < 135.03)
    elif region =="h-sidebands":
        region = ((dimuon_mass > 110) & (dimuon_mass < 115.03)) | ((dimuon_mass > 135.03) & (dimuon_mass < 150))
    elif region =="signal":
        region = (dimuon_mass >= 110) & (dimuon_mass <= 150.0)
    elif region =="z-peak":
        region = (dimuon_mass >= 70) & (dimuon_mass <= 110.0)

    # mu1_pt = events.mu1_pt
    # mu1ptOfInterest = (mu1_pt > 75) & (mu1_pt < 150.0)
    # events = events[region&mu1ptOfInterest]
    events = events[region]
    return events

V1_fields_2compute = [
    "wgt_nominal",
    "nBtagLoose_nominal",
    "nBtagMedium_nominal",
    "mu1_pt",
    "mu2_pt",
    "mu1_eta",
    "mu2_eta",
    "mu1_phi",
    "mu2_phi",
    "dimuon_pt",
    "dimuon_eta",
    "dimuon_phi",
    "dimuon_mass",
    "jet1_phi_nominal",
    "jet1_pt_nominal",
    "jet2_pt_nominal",
    "jet2_phi_nominal",
    "jet1_eta_nominal",
    "jet2_eta_nominal",
    "jj_mass_nominal",
    "jj_dEta_nominal",
    # "region",
    "event",
]
 
year = "2018"
label="WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear"
load_path =f"/depot/cms/users/shar1172/hmm/copperheadV1clean/{label}/stage1_output/{year}/f1_0"
# /depot/cms/users/shar1172/hmm/copperheadV1clean/ WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear

# # events_data = dak.from_parquet(f"{load_path}/data_D/*.parquet")
# # events_data = dak.from_parquet(f"{load_path}/data_F/*.parquet")
# # filelist = glob.glob(f"{load_path}/data_F")
# # filelist = glob.glob(f"{load_path}/data_*")
filelist = glob.glob(f"{load_path}/data_*")

total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*/*.parquet")

    events_data = ak.zip({field: events_data[field] for field in V1_fields_2compute}).compute()
    events_data = filterRegion(events_data, region="signal")
    events_data = applyGGH_cutV1(events_data)
    # events_data = applyVBF_cutV1(events_data)
    


    
    
    
    
    data_yield = ak.num(events_data.dimuon_mass, axis=0)
    df = pd.DataFrame({field: ak.fill_none(events_data[field], value=-999.9) for field in events_data.fields})
    print(f"data_yield for {file}: {data_yield}")
    total_integral += data_yield
total_integral


file: /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_C
data_yield for /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_C: 81222
file: /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_A
data_yield for /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_A: 164265
file: /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_B
data_yield for /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_B: 83050
file: /depot/cms/users/shar1172/hmm/copperheadV1clean/WithPurdueZptWgt_DYWithoutLHECut_16Feb_AllYear/stage1_output/2018/f1_0/data_D
data_yield for /depot/cms/u

705310