In [2]:
import os, glob, json, warnings
import pandas as pd
import importlib.util
from coffea import processor
from coffea.nanoevents import NanoAODSchema

In [3]:
### Directory Setup ###
outputPath = "output/results/makeDF/2018/mc/"
os.makedirs(outputPath, exist_ok=True)

In [4]:
### TaskVine Setup ###
manager_name = os.environ.get("VINE_MANAGER_NAME")
print(manager_name)
ports_str = os.environ.get("VINE_MANAGER_PORTS", "9123, 9150")
ports = [int(p.strip()) for p in ports_str.split(",")]

floability-8c382c50-eb45-45fe-a06a-39e482f9f94c


In [5]:
if len(ports) == 1:
    ports = ports[0]
else:
    ports = [int(p) for p in ports]

In [6]:
print(f"Manager Ports: {ports}")

Manager Ports: [9123, 9150]


In [7]:
executor_args = {
    'desc'             : f'Processing makeDF',
    'manager_name'     : manager_name,
    'port'             : ports,
    'extra_input_files': glob.glob(f"utils/*"),
    'retries'          : 5,
    'fast_terminate_workers': 0
}
executor = processor.TaskVineExecutor(**executor_args)

In [8]:
samples={"ETau":["data/samples/test/ETauData/"+f for f in os.listdir("data/samples/test/ETauData/")],
                "DYJets":["data/samples/test/DYData/"+f for f in os.listdir("data/samples/test/DYData/")]}

In [9]:
# module
processorpath = 'processors/makeDF.py' 
spec   = importlib.util.spec_from_file_location('my_processor', processorpath)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)

In [10]:
# processor
warnings.filterwarnings("ignore", message=r"Missing cross-reference.*", module="coffea.nanoevents.schemas.nanoaod")
processor_instance = module.my_processor(year="2018", type="mc")
run = processor.Runner(
    executor      = executor,
    schema        = NanoAODSchema,
    maxchunks     = None,
    chunksize     = 20000,
    xrootdtimeout = 900,
    skipbadfiles  = False
)

In [11]:
### Running & Saving ###
result = run(samples, treename="Events", processor_instance=processor_instance)
for sampleName in samples.keys():
    dicts = {}
    for key, value in result.items():
        if sampleName==key[1]: dicts.update({key[0]:value})
    parqResult = pd.DataFrame([{key[0]:value for key,value in result.items() if sampleName in key[1]}])
    parqResult.to_parquet(f"{outputPath}/{sampleName}.parq")
    print(f"Results saved in {outputPath}/{sampleName}.parq")

GridspecLayout(children=(VBox(children=(HTML(value=''), HTML(value='')), layout=Layout(grid_area='widget001'))â€¦

Output()

Results saved in output/results/makeDF/2018/mc//ETau.parq
Results saved in output/results/makeDF/2018/mc//DYJets.parq


In [15]:
from datetime import datetime
from zoneinfo import ZoneInfo
import uuid

now = datetime.now(ZoneInfo("America/New_York")).strftime("%Y-%m-%d %I:%M:%S %p")
exec_id = uuid.uuid4().hex[:8]

print(f"__floability_execution_done__::{now}::{exec_id}")

__floability_execution_done__::2026-02-08 02:47:09 PM::20a5b96c
