In [12]:
# Standard Library Imports
import copy
import os
import sys
import traceback

# Third-Party Imports
import matplotlib.pyplot as plt
import requests
import ROOT
import tqdm
import uproot
import awkward as ak
import vector
vector.register_awkward()
from cernopendata_client import searcher

# Local Application Imports
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from parse_atlas import consts, parser

In [None]:

parser = parser.ATLAS_Parser()
all_files_uris = parser.get_data_index(consts.ATLAS_13TEV_RECIDS)

In [None]:

SERVER_HTTP_URI = "http://opendata.cern.ch"
RECID = 80001
# Check if record with the given recid exists
searcher.verify_recid(server=SERVER_HTTP_URI, recid=80001)
metadata_from_recid = searcher.get_record_as_json(server=SERVER_HTTP_URI, recid=RECID)
total_files = 0
all_files = []
for file_indice in metadata_from_recid['metadata']['_file_indices']:
    files = file_indice['files']
    total_files += len(files)
    for file in files:
        uri = file['uri']
        all_files.append(uri)
        print(uri)
print('Total amount of root files - ', total_files)
# print(metadata_from_recid['metadata']['_file_indices'][0]['files'][0]['uri'])
# print(metadata_from_recid['metadata']['_file_indices'][0]['files'][0]['uri'])


In [None]:
# Schema to filter branches
schema = {
    "Electrons": [
        "pt", "eta", "phi",
    ],
    "Muons": [
        "pt", "eta", "phi",
    ],
    "Jets": [
        "pt", "eta", "phi", "m"
    ],
    "BTagging_AntiKt4EMPFlow": [
        "DL1dv01_pb",
    ]
}

# Function to read events according to the schema
def read_events(filename, schema):
    with uproot.open({filename: "CollectionTree"}) as tree:
        events = {}
        for objname, fields in schema.items():
            base = objname
            if objname in ["Electrons", "Muons", "Jets"]:
                base = "Analysis" + objname
                ak_zip = vector.zip
            else:
                ak_zip = ak.zip
            arrays = tree.arrays(
                fields,
                aliases={field: f"{base}AuxDyn.{field}" for field in fields},
            )
            arrays = ak_zip(dict(zip(arrays.fields, ak.unzip(arrays))))
            events[objname] = arrays
        return ak.zip(events, depth_limit=1)

# List to store events from each ROOT file
events_list = []

# Iterate over each ROOT file URL in the file index
# for file_url in file_index[:20]:
for file_url in all_files:
    # Create the full URL to the ROOT file (make sure it is HTTPS)
    root_url = file_url.strip()
    print(f"Processing ROOT file: {root_url}")
    
    try:
        # Read events from the ROOT file according to the schema
        events = read_events(root_url, schema)
        events_list.append(events)

    except Exception as e:
        # Print the exception message
        print(f"Error processing: {e}")
        # Print the exception type
        print(f"Exception type: {type(e)}")
        # Print the exception arguments
        print(f"Exception arguments: {e.args}")
        # Print the full traceback
        traceback.print_exc()

# Optionally, combine all events into one (if required)
if events_list:
    combined_events = ak.concatenate(events_list, axis=0)
    print("Successfully combined all events.")
else:
    print("No events were processed.")

# Optionally, you can work with combined_events here or perform further analysis
# Example: access electron data in the combined events
# electrons = combined_events["Electrons"]


In [None]:
GeV = 1000.

def selected_electrons(el):
    return el[(el.pt > 20 * GeV) & (abs(el.eta) < 2.47)]

def selected_muons(mu):
    return mu[(mu.pt > 20 * GeV) & (abs(mu.eta) < 2.47)]

def selected_jets(j):
    return j[(j.pt > 20 * GeV) & (abs(j.eta) < 2.47)]

def no_overlap(obj1, obj2, deltaR=0.4):
    obj1, obj2 = ak.unzip(ak.cartesian([obj1, obj2], nested=True))
    return ak.all(obj1.deltaR(obj2) > deltaR, axis=-1)

def mjjj(jets):
    candidates = ak.combinations(jets, 3)
    j1, j2, j3 = ak.unzip(candidates)
    has_b = (j1.is_bjet + j2.is_bjet + j3.is_bjet) > 0
    candidates["p4"] = j1 + j2 + j3
    candidates = candidates[has_b]
    candidates = candidates[ak.argmax(candidates.p4.pt, axis=1, keepdims=True)]
    return candidates.p4.mass

def  (events):
    events = copy.copy(events) # shallow copy
    events["Jets", "btag_prob"] = events.BTagging_AntiKt4EMPFlow.DL1dv01_pb
    events["Electrons"] = selected_electrons(events.Electrons)
    events["Muons"] = selected_muons(events.Muons)
    events["Jets"] = selected_jets(events.Jets)
    events["Jets"] = events.Jets[no_overlap(events.Jets, events.Electrons)]
    events["Jets", "is_bjet"] = events.Jets.btag_prob > 0.85
    events = events[
        (ak.num(events.Jets) >= 4) # at least 4 jets
        & ((ak.num(events.Electrons) + ak.num(events.Muons)) == 1) # exactly one lepton
        & (ak.num(events.Jets[events.Jets.is_bjet]) >= 2) # at least two btagged jets with prob > 0.85
    ]
    return ak.to_packed(events)

events = processed(combined_events)
plt.hist(ak.flatten(mjjj(events.Jets) / GeV, axis=None), bins=100)
plt.xlabel("Reconstructed Top Quark Mass (GeV)")
plt.ylabel("Number of Events")
plt.title("Distribution of Reconstructed Top Quark Mass")
plt.axvline(172.76, color='r', linestyle='dashed', linewidth=2, label='Expected Top Quark Mass')
plt.legend()
plt.show()

print('Total events:', len(combined_events))
print('Events after filtering:', len(events))