In [125]:
import uproot
import os
import multiprocessing
import math
import pandas as pd

path = "../data/"

def partition_helper(slice_entries, file_entries, file_curr, entry_curr):
    if slice_entries <= file_entries[file_curr] - entry_curr:
        return [file_curr, slice_entries + entry_curr]
    elif file_curr == len(file_entries) - 1:
        return [file_curr, file_entries[-1]]
    else:
        return partition_helper(slice_entries - file_entries[file_curr] + entry_curr, file_entries, file_curr + 1, 0)

def partition(files, n_processes):
    file_entries = [file.num_entries for file in files]
    slice_entries = math.ceil(sum(file_entries) / n_processes)
    slices = []
    file_start = 0
    entry_start = 0
    while not bool(slices) or slices[-1][-1] != (file_entries[-1]):
        slices.append([file_start, entry_start] + partition_helper(slice_entries, file_entries, file_start, entry_start))
        file_start = slices[-1][-2]
        entry_start = slices[-1][-1]
    return slices

def write_one_file(candidate_trees, candidate_partitions, ups_trees, ups_partitions, index, target_dir):
    candidate_data = []
    ups_data = []
    for i in range(candidate_partitions[index][0], candidate_partitions[index][2] + 1):
        candidate_data.append(candidate_trees[i].arrays(
            [key for key in candidate_trees[i].keys() if not key.endswith("_p4")],
            entry_start=candidate_partitions[index][1] if i == candidate_partitions[index][0] else None,
            entry_stop=candidate_partitions[index][3] if i == candidate_partitions[index][2] else None,
            library="pd"))
    for i in range(ups_partitions[index][0], ups_partitions[index][2] + 1):
        ups_data.append(ups_trees[i].arrays(
            [key for key in ups_trees[i].keys() if not key.endswith("_p4")],
            entry_start=ups_partitions[index][1] if i == ups_partitions[index][0] else None,
            entry_stop=ups_partitions[index][3] if i == ups_partitions[index][2] else None,
            library="pd"))
    file = uproot.recreate(target_dir + "/file" + str(index) + ".root")
    file.mkdir("rootuple")
    file["rootuple/CandidateTree"] = pd.concat(candidate_data)
    file["rootuple/UpsTree"] = pd.concat(ups_data)

def redistribute(path, n_files):
    target_dir = "../data/" + str(n_files) + "_files"
    os.mkdir(target_dir)
    candidate_trees = [uproot.open(path=path + filename + ":rootuple/CandidateTree", object_cache=None, array_cache=None) for filename in sorted(os.listdir(path))]
    candidate_partitions = partition(candidate_trees, n_files)
    ups_trees = [uproot.open(path=path + filename + ":rootuple/UpsTree", object_cache=None, array_cache=None) for filename in sorted(os.listdir(path))]
    ups_partitions = partition(ups_trees, n_files)
    result = multiprocessing.Manager().list()
    processes = []
    for i in range(n_files):
        p = multiprocessing.Process(target=write_one_file, args=[candidate_trees, candidate_partitions, ups_trees, ups_partitions, i, target_dir])
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
    

In [113]:
file0 = uproot.open("../merged/file0.root:rootuple/CandidateTree")
file1 = uproot.open("../merged/file1.root:rootuple/CandidateTree")
a0 = file0.arrays(
    [key for key in file0.keys() if not key.endswith("_p4")],
    library='pd', entry_start=0, entry_stop=100)
a1 = file1.arrays(
    [key for key in file1.keys() if not key.endswith("_p4")],
    library='pd', entry_start=0, entry_stop=100)
a = pd.concat([a0, a1])
b = uproot.open("../merged/file0.root:rootuple/CandidateTree")

KeyError: 'pE'

In [124]:
file0.show()

name                 | typename                 | interpretation                
---------------------+--------------------------+-------------------------------
run                  | int32_t                  | AsDtype('>i4')
event                | int32_t                  | AsDtype('>i4')
nCandPerEvent        | int32_t                  | AsDtype('>i4')
numPrimaryVertices   | int32_t                  | AsDtype('>i4')
trigger              | int32_t                  | AsDtype('>i4')
candidate_p4         | TLorentzVector           | AsStridedObjects(Model_TLor...
track1_p4            | TLorentzVector           | AsStridedObjects(Model_TLor...
track2_p4            | TLorentzVector           | AsStridedObjects(Model_TLor...
ditrack_p4           | TLorentzVector           | AsStridedObjects(Model_TLor...
dimuon_p4            | TLorentzVector           | AsStridedObjects(Model_TLor...
muonp_p4             | TLorentzVector           | AsStridedObjects(Model_TLor...
muonn_p4             | TLor

In [110]:
file = uproot.recreate("../test/temp.root")
file['tree'] = a


In [111]:
uproot.open("../test/temp.root:tree").arrays(library='pd', entry_start=0, entry_stop=100)

  out[name] = series[name]
  out[name] = series[name]
  out[name] = series[name]
  out[name] = series[name]
  out[name] = series[name]
  out[name] = series[name]
  out[name] = series[name]
  out[name] = series[name]


Unnamed: 0,index,run,event,nCandPerEvent,numPrimaryVertices,trigger,invm1Skk,invm2Skk,iPVwithmuons,dimuon_vertexWeight,...,track2_SDxyDsz,track2_SDszDsz,vChi2KalmanKK,vNDFKalmanKK,vProbKalmanKK,vChi2KinematicKK,vNDFKinematicKK,vProbKinematicKK,track1_minEigen,track2_minEigen
0,0,323778,1343450492,1,28,0,12.138042,12.701002,0,-1,...,4.739249e-06,0.000186,34.981873,1.0,3.327891e-09,34.981873,1.0,3.327891e-09,2.220721e-06,2.462624e-06
1,1,323778,1343640570,7,18,0,10.789125,11.352085,1,-1,...,-8.328677e-06,0.000223,0.730809,1.0,3.926211e-01,0.730809,1.0,3.926211e-01,-4.771986e-07,-2.130578e-06
2,2,323778,1343640570,7,18,0,11.463991,12.026951,1,-1,...,4.808555e-07,0.000130,91.434090,1.0,1.153717e-21,91.434090,1.0,1.153717e-21,-3.622351e-06,1.286049e-06
3,3,323778,1343640570,7,18,0,11.063797,11.626757,1,-1,...,4.808555e-07,0.000130,45.685955,1.0,1.388145e-11,45.685959,1.0,1.388143e-11,-1.036131e-05,1.286049e-06
4,4,323778,1343640570,7,18,0,10.802186,11.365146,1,-1,...,3.620091e-06,0.000393,61.725319,1.0,3.948701e-15,61.725319,1.0,3.948701e-15,-1.036131e-05,-4.771986e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,323778,1343831088,10,32,0,10.778086,11.341046,0,-1,...,3.048133e-06,0.000283,0.482558,1.0,4.872657e-01,0.482558,1.0,4.872657e-01,9.963768e-08,-9.323621e-08
96,96,323778,1343831088,10,32,0,10.879255,11.442215,0,-1,...,3.048133e-06,0.000283,26.889717,1.0,2.154019e-07,26.889748,1.0,2.153985e-07,7.273692e-08,-9.323621e-08
97,97,323778,1343831088,10,32,0,10.634991,11.197951,0,-1,...,3.048133e-06,0.000283,0.000000,0.0,0.000000e+00,-1.000000,-1.0,-1.000000e+00,1.113631e-08,-9.323621e-08
98,98,323778,1343831088,10,32,0,11.453906,12.016866,0,-1,...,1.729313e-06,0.000085,0.000000,0.0,0.000000e+00,-1.000000,-1.0,-1.000000e+00,-1.318832e-03,5.194909e-08
