In [1]:
import coffea, hist
print(coffea.__version__)
print(hist.__version__)

2025.10.2
2.9.0


In [2]:
import awkward as ak
import hist

#import boost_histogram as bh

#import dask.array as da

#import dask_histogram as dh

import json
import sys

from coffea import processor
from coffea.nanoevents import NanoAODSchema, BaseSchema


from taggers.lep_tagger_dev import tag_qual, tag_qual_and_gen

class Processor(processor.ProcessorABC):
    def __init__(self, mode="virtual"):
        assert mode in ["virtual", "eager", "dask"]
        self._mode = mode

    def process(self, events):
        dataset = events.metadata["dataset"]
        print(dataset)

        is_UL = events.metadata.get("is_UL", False)

        events = tag_qual(events)

        events_2 = tag_qual_and_gen(events)

        leps_wo_gen = events.Leptons

        leps_w_gen = events_2.Leptons

        h_pt = hist.Hist(
            hist.axis.Regular(50, 0, 200, name="gen", label="gen"),
        )
        
        # Fill histogram
        h_pt.fill(
            gen=ak.flatten(leps_w_gen.gen_tag)
           )
        
        output = {
            "lep_wo_gen_fields": leps_wo_gen.fields,
            "lep_w_gen_fields": leps_w_gen.fields,
            "h_pt": h_pt
        } 


        #Do analysis here, fill output dict with results
                
        return output  
        
    def postprocess(self, accumulator):
        pass

In [3]:
import cloudpickle
import os
from datetime import datetime
from dask.distributed import Client

# Create pickles directory if it doesn't exist
os.makedirs("pikls", exist_ok=True)

# Set mode
mode = "virtual"  # or "dask"
make_pikl = False

#fileset_name = "fileset_full.txt"
fileset_name = "fileset.txt"


results = {}
all_metrics = {}

with open(fileset_name, "r") as f:
    exec(f.read())

# Set up executor based on mode
if mode == "dask":
    client = Client("tls://localhost:8786")
    executor = processor.DaskExecutor(client=client)
    
else:  # virtual mode
    executor = processor.IterativeExecutor()

# Set up output directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"pikls/{timestamp}"
os.makedirs(output_dir, exist_ok=True)

# Process each dataset
for dataset_name in fileset.keys():
    print(f"\nProcessing {dataset_name}...")
    
    single_fileset = {dataset_name: fileset[dataset_name]}
    
    runner = processor.Runner(
        executor=executor,
        schema=NanoAODSchema,
        savemetrics=True,
        skipbadfiles=False,
    )
    
    result, metrics = runner(single_fileset, processor_instance=Processor(mode=mode))
    results[dataset_name] = result
    all_metrics[dataset_name] = metrics
    
    if make_pikl:
        output_file = f"{output_dir}/{dataset_name}.pkl"
        
        with open(output_file, "wb") as f:
            cloudpickle.dump({'result': result, 'metrics': metrics}, f)
        
        print(f"Saved {output_file}")

# Save combined results
if make_pikl:
    output_file = f"{output_dir}/all_datasets_full.pkl"
    
    with open(output_file, "wb") as f:
        cloudpickle.dump({'results': results, 'metrics': all_metrics}, f)
    
    print(f"Saved {output_file}")

Output()


Processing WtoLNu-2JetsTuneCP513p6TeVamcatnloFXFX-pythia8...


Output()

Output()


Processing SMS-TChiWZZToLLmZMin-0p1TuneCP513TeV-madgraphMLM-pythia8RunIISummer20UL18NanoAODv9-106Xupgrade2018realisticv16L1v1-v1NANOAODSIM...


Output()

Exception: Failed processing file: WorkItem(dataset='SMS-TChiWZZToLLmZMin-0p1TuneCP513TeV-madgraphMLM-pythia8RunIISummer20UL18NanoAODv9-106Xupgrade2018realisticv16L1v1-v1NANOAODSIM', filename='root://cms-xrd-global.cern.ch//store/mc/RunIISummer20UL18NanoAODv9/SMS-TChiWZ_ZToLL_mZMin-0p1_TuneCP5_13TeV-madgraphMLM-pythia8/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/2560000/09873976-BDB9-AD4E-8D15-3CB28937D990.root', treename='Events', entrystart=0, entrystop=98050, fileuuid=b'\xf3\xa9\xd6\xea\xa3\x0e\x11\xed\xa3HD\xbf\xe1\x83\xbe\xef', usermeta={'is_mc': True, 'is_UL': True}). The error was: AttributeError("no field named 'mvaIso_WP90'").

In [6]:
result['h_pt'].values

<bound method Histogram.values of Hist(Regular(50, 0, 200, name='gen'), storage=Double()) # Sum: 310970.0 (339328.0 with flow)>

In [None]:
result['lep_w_gen_fields']

In [None]:
result['ele_pt']

In [None]:
result['lpte_pt']

In [None]:
result['muon_pt']

In [None]:
result['ele_fields']

In [None]:
list(result.keys())

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
fig, axs = plt.subplots(figsize=(11, 5))
result['MET_lt_150_dilep_ss_mumu_plot'].integrate("qual_sum", [2j, 20j, 200j, 11j, 110j, 101j]).plot2d(norm=LogNorm())
#result['MET_lt_150_dilep_ss_mumu_plot'].integrate("qual_sum").plot2d(norm=LogNorm())

In [None]:
result['MET_lt_150_dilep_ss_mumu_plot_ratio'].plot()
plt.yscale('log')

In [None]:
def baseline(hist):
    return hist.integrate("qual_sum", [2j,11j,20j,101j,110j,200j])
baseline(result['MET_lt_150_dilep_ss_mumu_plot']).plot2d(norm=LogNorm())

In [None]:
h = result['MET_lt_150_dilep_ss_mumu_plot_ratio']
fig, axs = plt.subplots(figsize=(11, 5))
# Create a label mapping for the qual_sum axis
labels = [
    "ff",
    "fb",
    "bb",
    "fs",
    "bs",
    "ss",
    "fg",
    "gb",
    "gs",
    "gg"
]

#plt.gca().set_yticklabels(labels)
#h.axes['dr_div_m'].label = r"$\frac{\Delta R}{m_{\ell\ell}}$"
h.plot2d(norm=LogNorm())
# Now plot

In [None]:
u_qs

In [None]:
h.integrate("qual_sum", 101j)[:,:]

In [None]:
result

In [None]:
result['300_290']['300_290_dilep_plot'].integrate("qual_sum", 20j)

In [None]:
print(result['MET_lt_150_dilep_os_mumu_plot'].sum(flow=True))
print(result['MET_lt_150_dilep_os_mumu_count'])

In [None]:
result.keys()

In [None]:
results.keys()

In [None]:
result.keys()

In [None]:
for key in results[list(results)[0]].keys():
    print(key)

In [None]:
results[list(results)[0]]["MET_lt_150_dilep_os_ee_plot"]

In [None]:
#results[sig_name]['lpte_dict']['pt_ID_hist'].integrate("qual_tag")[:,:, 10j]
#r['test_dict']['pt_eta_hist'].project('pt', 'qual_tag').plot()
#TChi_r['electron_dict']['pt_gen_qual_hist'][:,10j,3j]

#[-2, 0, 1, 2, 3, 4, 5, 6]

In [None]:
results[list(results)[0]]["MET_lt_150_dilep_os_ee_plot"][:,:,0j]

In [None]:
results[list(results)[0]]["MET_lt_150_dilep_os_ee_plot"].integrate("qual_sum")

In [None]:
for mp, mp_dict in results[list(results)[1]].items():
    print()
    print(mp)
    print()
    for key, item in mp_dict.items():
        if "_count" in key:
            print(key)
            print(item)

In [None]:
result['dilep_m_ll_gt_1p0_MET_lt_150_plot'].plot()

In [None]:
result['dilep_diag_cut_a2_MET_lt_150_plot'].plot()

In [None]:
result['dilep_dr_gt_0p1_MET_lt_150_plot'].plot()

In [None]:
results[results.keys()]['dilep_diag_cut_2_MET_lt_150_plot'].plot()

In [None]:
results[list(results.keys())[0]]['dilep_diag_cut_2_MET_lt_150_count']

In [None]:
results[list(results.keys())[0]]['dilep_diag_cut_2_MET_lt_150_plot'].plot()

In [None]:
results[list(results.keys())[1]]['dilep_m_ll_gt_1p0_300_290_MET_lt_150_plot'].plot()

In [None]:
results[list(results.keys())[1]]

In [None]:
for key in results[list(results.keys())[1]].keys():
    print(key)