# Convert ROOT files Clusterfeature

In [1]:
import ROOT
import h5py
import numpy as np

Welcome to JupyROOT 6.16/00


In [2]:
def extract(cluster, i_event, prim):
    return [
        i_event,
        prim,
        cluster.GetT(),
        cluster.GetE(),
        cluster.GetSize(),
        cluster.GetEToF(),
        cluster.GetEnergyMoment(),
        cluster.GetLastHit().GetT() - cluster.GetFirstHit().GetT(),
        cluster.GetMaxEnergyHit().GetE(),
        cluster.GetPosition().X(),
        cluster.GetPosition().Y(),
        cluster.GetPosition().Z(),
    ]

In [3]:
def get_num_events(infiles):
    # List Comprehension does not work with ROOT
    num_events = 0
    for _, filename in infiles:
        tfile = ROOT.TFile.Open(filename)
        ttree = tfile.Get("evt")
        num_events += ttree.GetEntries()
    print(num_events)
    return num_events

In [4]:
def get_num_clusters(infiles):
    # List Comprehension does not work with ROOT
    num_clusters = 0
    for _, filename in infiles:
        tfile = ROOT.TFile.Open(filename)
        ttree = tfile.Get("evt")
        for event in ttree:
            num_clusters += event.NeulandSecondaryClusters.GetEntries()
            num_clusters += event.NeulandPrimaryClusters.GetEntries()
    print(num_clusters)
    return num_clusters

In [9]:
def create_hdf5(infiles, outfile, num_dp):
    num_events = get_num_events(infiles)
    num_clusters = get_num_clusters(infiles)
    num_clusterfeatures = 12
    chunk_size = 100

    print(f"->     Writing to {outfile}")
    with h5py.File(outfile, "w") as h5file:
        clusters = h5file.create_dataset(
            "clusters",
            shape=(num_clusters, num_clusterfeatures),
            dtype=np.float32,
            chunks=(100, num_clusterfeatures),
            compression="gzip",
            compression_opts=9,
        )
        buff = np.zeros((num_clusterfeatures), np.float32)

        consolidated = h5file.create_dataset("consolidated", shape=(num_events, 3), dtype=np.int16)
        cbuff = np.zeros((3), np.int16)

        multiplicity = h5file.create_dataset("multiplicity", (num_events, 3), np.int8)
        mbuff = np.zeros((3), np.int8)

        i_event = 0
        i_cluster = 0
        for nIn, filename in infiles:
            print(f"Reading ROOT file {filename}")
            tfile = ROOT.TFile.Open(filename)
            ttree = tfile.Get("evt")
            for event in ttree:
                # Consolidated Features
                # nHits: Number of hits
                cbuff[0] = event.NeulandHits.GetEntries()
                # nClus: Number of clusters
                cbuff[1] = event.NeulandClusters.GetEntries()
                # Edep: Total deposited (detected) energy
                cbuff[2] = round(sum([hit.GetE() for hit in event.NeulandHits]))
                consolidated[i_event] = cbuff

                # Multiplicity
                # nPN: Number of incoming primary neutrons
                mbuff[0] = nIn
                # nPP: Number of primary neutrons with an energy deposition in NeuLAND
                mbuff[1] = event.NeulandPrimaryPoints.GetEntries()
                # nPH: Number of hits that correspond to a energy deposition of a primary neutron
                mbuff[2] = event.NeulandPrimaryHits.GetEntries()
                multiplicity[i_event] = mbuff

                for cluster in event.NeulandSecondaryClusters:
                    clusters[i_cluster] = extract(cluster, i_event, 0)
                    i_cluster += 1

                for cluster in event.NeulandPrimaryClusters:
                    clusters[i_cluster] = extract(cluster, i_event, 1)
                    i_cluster += 1

                i_event += 1
                if i_event % 10000 == 0:
                    print(i_event, i_cluster)

Note: Don't try parallel execution here

In [10]:
incoming_neutrons = range(1, 6)
beam_energy = 600


def create_hdf5_wrap(num_dp):
    infiles = [
        (
            neutrons,
            f"../simulation/data/training_{beam_energy}AMeV_{num_dp}dp_{neutrons}n.digi.root",
        )
        for neutrons in incoming_neutrons
    ]
    outfile = f"data/{beam_energy}AMeV_{num_dp}dp.clusters.h5"
    create_hdf5(infiles, outfile, num_dp)


for num_dp in [15, 30]:
    create_hdf5_wrap(num_dp)

5000000
62258817
->     Writing to data/600AMeV_15dp.clusters.h5
Reading ROOT file ../simulation/data/training_600AMeV_15dp_1n.digi.root
10000 42001
20000 84597
30000 126786
40000 169493
50000 211861
60000 254365
70000 296826
80000 340043
90000 382433
100000 425367
110000 467772
120000 510158
130000 552690
140000 595343
150000 637915
160000 680309
170000 722815
180000 764549
190000 807013
200000 850202
210000 892272
220000 934613
230000 976890
240000 1019020
250000 1061263
260000 1103514
270000 1145938
280000 1188862
290000 1230769
300000 1273032
310000 1315678
320000 1358491
330000 1400463
340000 1442600
350000 1484755
360000 1527654
370000 1570578
380000 1613190
390000 1656101
400000 1699014
410000 1741082
420000 1783950
430000 1825924
440000 1868061
450000 1910319
460000 1952939
470000 1995472
480000 2038182
490000 2080794
500000 2122992
510000 2165248
520000 2207712
530000 2250298
540000 2292960
550000 2335520
560000 2377534
570000 2420030
580000 2462801
590000 2505088
600000 25476