In [21]:
import uproot
import glob
import numpy as np

from pathlib import Path
from typing import Optional, Union
from hist.hist import Hist
from hist.axis import StrCategory, IntCategory

In [22]:
def merge_trees(tree_list):
    if len(tree_list) == 0:
        print(f"Empty trees: {tree_list}")
        return
    
    merged_arrays = []
    for tree in tree_list:
        try:
            merged_arrays.append(tree.arrays(library='np'))
        except Exception as e:
            print(f"Error in {tree}: {e}")
    
    keys = set(merged_arrays[0].keys())
    merged_tree = {key: np.concatenate([arrays[key] for arrays in merged_arrays]) for key in keys}
    return merged_tree


def merge_histograms(hist_list):
    n_axes = len(hist_list[0].axes)
    all_categories = [set() for _ in range(n_axes)]
    for hist in hist_list:
        for i in range(n_axes):
            all_categories[i].update(hist.axes[i])
    combined_axes = []
    for i in range(n_axes):
        if isinstance(hist_list[0].axes[i], StrCategory):
            combined_axes.append(StrCategory(list(all_categories[i])))
        elif isinstance(hist_list[0].axes[i], IntCategory):
            combined_axes.append(IntCategory(list(all_categories[i])))
        else:
            raise ValueError(f"Unsupported axis type.")
    
    combined_hist = Hist(*combined_axes)

    for hist in hist_list:
        if n_axes == 1:
            for cat, val in zip(hist.axes[0], hist.view()):
                combined_hist.fill([cat], weight=[val])
        elif n_axes == 2:
            for x_idx, x_cat in enumerate(hist.axes[0]):
                for y_idx, y_cat in enumerate(hist.axes[1]):
                    value = hist.view(flow=False)[x_idx, y_idx]
                    if value > 0:
                        combined_hist.fill([x_cat], [y_cat], weight=value)            
        else:
            raise ValueError("Only 1D and 2D histograms are supported.")
    return combined_hist


def check_branch_sizes(merged_tree):
    lengths = {key: len(value) for key, value in merged_tree.items()}
    min_length = min(lengths.values())
    for key, length in lengths.items():
        if length != min_length:
            print(f"Warning: Branch {key} has {length} entries, but expected {min_length}. Truncating.")
            merged_tree[key] = merged_tree[key][:min_length]

    return merged_tree


def merge_nanoaod_files(input_dir: str, output_file: str) -> str:
    input_files = glob.glob(f"{input_dir}/**/*.root", recursive=True)  
    
    if not input_files:
        print(f"No files found in directory: {input_dir}")
        return None

    tree_dict = {}
    hist_dict = {}

    with uproot.open(input_files[0]) as f:
        for key in f.keys():
            if f[key].classname.startswith("TTree"):
                tree_dict[key] = []
            elif f[key].classname.startswith("TH"):
                hist_dict[key] = []

    for input_file in input_files:
        try:
            with uproot.open(input_file) as f:
                for tree_key in tree_dict:
                    try:
                        tree_dict[tree_key].append(f[tree_key])
                    except Exception as e:
                        print(f"Skipping problematic tree in file: {input_file}. Error: {e}")
                        tree_dict[tree_key].append(np.array([]))

                for hist_key in hist_dict:
                    try:
                        hist_dict[hist_key].append(f[hist_key].to_hist())
                    except Exception as e:
                        print(f"Skipping problematic hist in file: {input_file}. Error: {e}")
                        hist_dict[hist_key].append(np.array([]))

        except Exception as e:
            print(f"Skipping problematic file: {input_file}. Error: {e}")
            continue

    #merged_tree_dict = {tree_key: check_branch_sizes(merge_trees(tree_list)) for tree_key, tree_list in tree_dict.items()}
    merged_tree_dict = {tree_key: merge_trees(tree_list) for tree_key, tree_list in tree_dict.items()}
    merged_hist_dict = {hist_key: merge_histograms(hist_list) for hist_key, hist_list in hist_dict.items()}

    with uproot.recreate(output_file) as output_file_obj:
        for tree_key, merged_tree in merged_tree_dict.items():
            output_file_obj[tree_key[:-2]] = merged_tree

        for hist_key, merged_hist in merged_hist_dict.items():
            output_file_obj[hist_key[:-2]] = merged_hist

    print(f"Successfully merged files and saved to {output_file}")

In [24]:
era_list = [
    "SingleMuon/Run2022B",
    "SingleMuon/Run2022C", 
    "Muon/Run2022C",
    "Muon/Run2022D",
    "Muon/Run2022E",
    "Muon/Run2022F",
    "Muon/Run2022G" ,
    "Muon0/Run2023B",
    "Muon0/Run2023C",
    "Muon0/Run2023D",
    "Muon0/Run2024B",
    "Muon0/Run2024C",
    "Muon0/Run2024D",
    "Muon0/Run2024E",
    "Muon0/Run2024F",
    "Muon1/Run2023B",
    "Muon1/Run2023C",
    "Muon1/Run2023D",
    "Muon1/Run2024B",
    "Muon1/Run2024C",
    "Muon1/Run2024D",
    "Muon1/Run2024E",
    "Muon1/Run2024F",
]

for era in era_list:
    input_dir = f'/users/hep/eigen1907/STORE/TnP-Flat-NanoAOD/{era}*/*/*/'
    output_file = f'/users/hep/eigen1907/Workspace/Workspace-RPC/Log/NanoAOD-TnP/240923/merge/{era}.root'
    print("="*100)
    print(f"Trying merge files in {input_dir}")
    merge_nanoaod_files(input_dir, output_file)

Merge files in /users/hep/eigen1907/STORE/TnP-Flat-NanoAOD/SingleMuon/Run2022B*/*/*/
Successfully merged files and saved to /users/hep/eigen1907/Workspace/Workspace-RPC/Log/NanoAOD-TnP/240923/merge/SingleMuon/Run2022B.root
Merge files in /users/hep/eigen1907/STORE/TnP-Flat-NanoAOD/SingleMuon/Run2022C*/*/*/
Error in <TTree 'hit_tree' (31 branches) at 0x7f9405215040>: index -1 is out of bounds for axis 0 with size 0
Successfully merged files and saved to /users/hep/eigen1907/Workspace/Workspace-RPC/Log/NanoAOD-TnP/240923/merge/SingleMuon/Run2022C.root
Merge files in /users/hep/eigen1907/STORE/TnP-Flat-NanoAOD/Muon/Run2022C*/*/*/
Error in <TTree 'hit_tree' (31 branches) at 0x7f92ce2ebdd0>: index -1 is out of bounds for axis 0 with size 0
Error in <TTree 'hit_tree' (31 branches) at 0x7f92cb6bc9b0>: index -1 is out of bounds for axis 0 with size 0
Error in <TTree 'hit_tree' (31 branches) at 0x7f92111c0bc0>: index -1 is out of bounds for axis 0 with size 0
Error in <TTree 'hit_tree' (31 bran

ValueError: 'extend' must fill every branch with the same number of entries; 'dimuon_pt' has 3335748 entries