In [8]:
import sys
from pathlib import Path

project_root = Path.cwd().parent   # one level up
sys.path.insert(0, str(project_root))
#Import the tools
from src.parse_atlas import parser
from src.calculations import physics_calcs, combinatorics
import math, awkward as ak, numpy as np
import atlasopenmagic as atom
import matplotlib.pyplot as plt
import os
import uproot

In [None]:
def save_file(events, file_name, output_dir):
    os.makedirs(output_dir, exist_ok=True)
        
    output_path = os.path.join(output_dir, f"{file_name}")
    with uproot.recreate(output_path) as file:
        file["CollectionTree"] = events

def flatten_for_root(awk_arr):
        """
        Flatten a top-level awkward Array into a ROOT-friendly dict
        compatible with _parse_file()'s expected field structure.
        Specifically, each particle object is stored under
        "<cur_obj_name>AuxDyn.<field>" branches.
        """
        root_ready = {}

        for obj_name in awk_arr.fields:
            obj = awk_arr[obj_name]

            cur_obj_branch_name = parser.AtlasOpenParser._prepare_obj_branch_name(
                obj_name, schema=parser.schemas.INVARIANT_MASS_SCHEMA) 

            if cur_obj_branch_name is None:
                cur_obj_branch_name = obj_name

            # If obj is a record array, iterate over its fields
            for field in obj.fields:
                branch = obj[field]
                # ROOT doesn't like None â€” fill with 0.0
                filled_branch = ak.fill_none(branch, 0.0)

                # IMPORTANT: match the structure used by _parse_file
                # e.g., "AnalysisJetsAuxDyn.pt"
                branch_name = f"{cur_obj_branch_name}.{field}"
                root_ready[branch_name] = filled_branch

        
        
        return root_ready

In [25]:

root_files_path = "data/root_files/"
final_file = None
for file in os.listdir(root_files_path)[::-2]:
    parsed_file = parser.AtlasOpenParser.parse_file(os.path.join(root_files_path, file))
    print(f"Parsed file: {file} with {len(parsed_file)} events, size {parsed_file.nbytes / 1e6:.2f} MB")
    final_file = ak.concatenate([final_file, parsed_file]) if final_file is not None else parsed_file
    
    if final_file.nbytes > 3e9:
        print(f"Reached 1 GB with {len(final_file)} events, saving intermediate file.")
        intermediate_filename = f"combined_up_to_{file}"
        root_ready = flatten_for_root(final_file)
        save_file(root_ready, intermediate_filename, "data/big_testing_files/")
        break

# parser.AtlasOpenParser.save_events_as_root(final_file, "data/root_files/combined_file.root")

Parsed file: 2024r-pp_99570e2f61ec52be.root with 204201 events, size 57.67 MB


KeyboardInterrupt: 

In [23]:
a = parser.AtlasOpenParser.parse_file("data/root_files/combined_up_to_2024r-pp_5d76989ff9c5faaf.root.root")

KeyboardInterrupt: 