In [1]:
import numpy as np
import uproot as ur
import awkward as ak

In [2]:
path_prefix = 'D:/Work/EPE/ML4pi/'
plotpath = path_prefix+'plots/'
modelpath_c = path_prefix+''
modelpath = path_prefix+''
ext_path = "H:/EPE_file_storage/"
ext_modelpath = ext_path + "Model/"
ext_datapath = ext_path + "data_storage/pipm/"
ext_plotpath = ext_path + "plots/"

In [3]:
def DeltaR(coords, ref):
    ''' Straight forward function, expects Nx2 inputs for coords, 1x2 input for ref '''
    ref = np.tile(ref, (len(coords[:,0]), 1))
    DeltaCoords = np.subtract(coords, ref)
    ## Mirroring ##
    gt_pi_mask = DeltaCoords > np.pi
    lt_pi_mask = DeltaCoords < - np.pi
    DeltaCoords[lt_pi_mask] = DeltaCoords[lt_pi_mask] + 2*np.pi
    DeltaCoords[gt_pi_mask] = DeltaCoords[gt_pi_mask] - 2*np.pi
    return np.sqrt(DeltaCoords[:,0]**2 + DeltaCoords[:,1]**2)

# Returns tuple of nEvents, highest number of clusters in events, and dimensions of pointcloud
def find_max_dim_tuple(events, event_dict):
    nEvents = len(events)
    max_clust = 0
    
    for i in range(nEvents):
        event = events[i,0]
        track_nums = events[i,1]
        clust_nums = events[i,2]
        
        clust_num_total = 0
        # set this to six for now to handle single track events, change later
        track_num_total = 6
        
        # Check if there are clusters, None type object may be associated with it
        if clust_nums is not None:
            # Search through cluster indices
            for clst_idx in clust_nums:
                nInClust = len(event_dict['cluster_cell_ID'][event][clst_idx])
                # add the number in each cluster to the total
                clust_num_total += nInClust

        total_size = clust_num_total + track_num_total
        if total_size > max_clust:
            max_clust = total_size
    
    # 6 for energy, eta, phi, rperp, track flag, sample layer
    return (nEvents, max_clust, 6)

def dict_from_tree(tree, branches=None, np_branches=None):
    ''' Loads branches as default awkward arrays and np_branches as numpy arrays. '''
    dictionary = dict()
    if branches is not None:
        for key in branches:
            branch = tree.arrays()[key]
            dictionary[key] = branch
            
    if np_branches is not None:
        for np_key in np_branches:
            np_branch = np.ndarray.flatten(tree.arrays()[np_key].to_numpy())
            dictionary[np_key] = np_branch
    
    if branches is None and np_branches is None:
        raise ValueError("No branches passed to function.")
        
    return dictionary

def find_index_1D(values, dictionary):
    ''' Use a for loop and a dictionary. values are the IDs to search for. dict must be in format 
    (cell IDs: index) '''
    idx_vec = np.zeros(len(values), dtype=np.int32)
    for i in range(len(values)):
        idx_vec[i] = dictionary[values[i]]
    return idx_vec

def topo_clustering(cell_eta,cell_phi,cell_sampling, cell_energies):
    
    emb1 = np.zeros([128,4])
    emb2 = np.zeros([16, 16])
    emb3 = np.zeros([16, 8])
    tile0 = np.zeros([4,4])
    tile1 = np.zeros([4,4])
    tile2 = np.zeros([2,4])

    array_key = {
        1: emb1,
        2: emb2,
        3: emb3,
        12: tile0,
        13: tile1,
        14: tile2
    }

    for idx in range(len(cell_phi)):
      point_out = 0
      layer = int(cell_sampling[idx])
      if not (layer >=1 and layer <= 3) and not (layer >= 12 and layer <= 14):
        continue
      layer_array = array_key[layer]
      layer_string = layer_key[layer]['string']

      cell_size_eta = cell_meta[layer_string]['cell_size_eta']
      nEta = layer_array.shape[0]
      cell_size_phi = cell_meta[layer_string]['cell_size_phi']
      nPhi = layer_array.shape[1]
      cell_energies = np.array(cell_energies)

      eta_span = layer_key[layer]['eta_span']
      phi_span = layer_key[layer]['phi_span']
      eta_start = layer_key[layer]['eta_start']
      phi_start = - int(nPhi/2) * cell_size_phi

      N = int(np.floor( ( (cell_eta[idx] - eta_start)/eta_span) * nEta ))
      M = int(np.floor( ( (cell_phi[idx] - phi_start)/phi_span) * nPhi ))

      inbounds = True
      if N < 0 or M < 0:
          inbounds = False
      if N >= nEta or M >= nPhi:
          inbounds = False
          
      if inbounds == True:
          layer_array[N,M] = cell_energies[idx]
      else:
          point_out += 1
    return array_key
    """out = np.zeros((936,))
    curr_idx = 0
    for key in array_key:
      curr = array_key[curr]
      out[curr_idx:curr.shape[0]] = curr
      curr_idx = curr_idx + curr.shape[0]
    return out"""

In [4]:
#====================
# Metadata ==========
#====================
track_branches = ['trackEta_EMB1', 'trackPhi_EMB1', 'trackEta_EMB2', 'trackPhi_EMB2', 'trackEta_EMB3', 'trackPhi_EMB3',
                  'trackEta_TileBar0', 'trackPhi_TileBar0', 'trackEta_TileBar1', 'trackPhi_TileBar1',
                  'trackEta_TileBar2', 'trackPhi_TileBar2']

event_branches = ["cluster_nCells", "cluster_cell_ID", "cluster_cell_E", 'cluster_nCells', "nCluster", "eventNumber",
                  "nTrack", "nTruthPart", "truthPartPdgId", "cluster_Eta", "cluster_Phi", 'trackPt', 'trackP',
                  'trackMass', 'trackEta', 'trackPhi', 'truthPartE', 'cluster_ENG_CALIB_TOT', "cluster_E", 'truthPartPt']

ak_event_branches = ["cluster_nCells", "cluster_cell_ID", "cluster_cell_E", "cluster_nCells",
                  "nTruthPart", "truthPartPdgId", "cluster_Eta", "cluster_Phi", "trackPt", "trackP",
                  "trackMass", "trackEta", "trackPhi", "truthPartE", "cluster_ENG_CALIB_TOT", "cluster_E", "truthPartPt"]

np_event_branches = ["nCluster", "eventNumber", "nTrack", "nTruthPart"]

geo_branches = ["cell_geo_ID", "cell_geo_eta", "cell_geo_phi", "cell_geo_rPerp", "cell_geo_sampling"]

cell_meta = {
    'EMB1': {
        'cell_size_phi': 0.098,
        'cell_size_eta': 0.0031,
        'len_phi': 4,
        'len_eta': 128
    },
    'EMB2': {
        'cell_size_phi': 0.0245,
        'cell_size_eta': 0.025,
        'len_phi': 16,
        'len_eta': 16
    },
    'EMB3': {
        'cell_size_phi': 0.0245,
        'cell_size_eta': 0.05,
        'len_phi': 16,
        'len_eta': 8
    },
    'TileBar0': {
        'cell_size_phi': 0.1,
        'cell_size_eta': 0.1,
        'len_phi': 4,
        'len_eta': 4
    },
    'TileBar1': {
        'cell_size_phi': 0.1,
        'cell_size_eta': 0.1,
        'len_phi': 4,
        'len_eta': 4
    },
    'TileBar2': {
        'cell_size_phi': 0.1,
        'cell_size_eta': 0.2,
        'len_phi': 4,
        'len_eta': 2
    },
}

# additional geometry data
layer_rPerp = np.array([1540., 1733., 1930., 2450., 3010., 3630.])
track_sample_layer = np.array([1,2,3,12,13,14])

# for event dictionary
events_prefix = ext_datapath

# Use this to compare with the dimensionality of new events
firstArray = True

emb1_cell_size_eta = cell_meta['EMB1']['cell_size_eta']
emb1_cell_size_phi = cell_meta['EMB1']['cell_size_phi']
emb1_nEta, emb1_nPhi = 128, 4
emb1_eta_span = emb1_cell_size_eta * emb1_nEta
emb1_phi_span = emb1_cell_size_phi * emb1_nPhi
emb1_eta_start = - int(emb1_nEta/2) * emb1_cell_size_eta
emb1_phi_start = - int(emb1_nPhi/2) * emb1_cell_size_phi
emb1_eta_end = emb1_eta_start + emb1_eta_span
emb1_phi_end = emb1_phi_start + emb1_phi_span

emb2_cell_size_eta = cell_meta['EMB2']['cell_size_eta']
emb2_cell_size_phi = cell_meta['EMB2']['cell_size_phi']
emb2_nEta, emb2_nPhi = 16, 16
emb2_eta_span = emb2_cell_size_eta * emb2_nEta
emb2_phi_span = emb2_cell_size_phi * emb2_nPhi
emb2_eta_start = - int(emb2_nEta/2) * emb2_cell_size_eta
emb2_phi_start = - int(emb2_nPhi/2) * emb2_cell_size_phi
emb2_eta_end = emb2_eta_start + emb2_eta_span
emb2_phi_end = emb2_phi_start + emb2_phi_span

emb3_cell_size_eta = cell_meta['EMB3']['cell_size_eta']
emb3_cell_size_phi = cell_meta['EMB3']['cell_size_phi']
emb3_nEta, emb3_nPhi = 8, 16
emb3_eta_span = emb3_cell_size_eta * emb3_nEta
emb3_phi_span = emb3_cell_size_phi * emb3_nPhi
emb3_eta_start = - int(emb3_nEta/2) * emb3_cell_size_eta
emb3_phi_start = - int(emb3_nPhi/2) * emb3_cell_size_phi
emb3_eta_end = emb3_eta_start + emb3_eta_span
emb3_phi_end = emb3_phi_start + emb3_phi_span

tilebar0_cell_size_eta = cell_meta['TileBar0']['cell_size_eta']
tilebar0_cell_size_phi = cell_meta['TileBar0']['cell_size_phi']
tilebar0_nEta, tilebar0_nPhi = 4, 4
tilebar0_eta_span = tilebar0_cell_size_eta * tilebar0_nEta
tilebar0_phi_span = tilebar0_cell_size_phi * tilebar0_nPhi
tilebar0_eta_start = - int(tilebar0_nEta/2) * tilebar0_cell_size_eta
tilebar0_phi_start = - int(tilebar0_nPhi/2) * tilebar0_cell_size_phi
tilebar0_eta_end = tilebar0_eta_start + tilebar0_eta_span
tilebar0_phi_end = tilebar0_phi_start + tilebar0_phi_span

tilebar1_cell_size_eta = cell_meta['TileBar1']['cell_size_eta']
tilebar1_cell_size_phi = cell_meta['TileBar1']['cell_size_phi']
tilebar1_nEta, tilebar1_nPhi = 4, 4
tilebar1_eta_span = tilebar1_cell_size_eta * tilebar1_nEta
tilebar1_phi_span = tilebar1_cell_size_phi * tilebar1_nPhi
tilebar1_eta_start = - int(tilebar1_nEta/2) * tilebar1_cell_size_eta
tilebar1_phi_start = - int(tilebar1_nPhi/2) * tilebar1_cell_size_phi
tilebar1_eta_end = tilebar1_eta_start + tilebar1_eta_span
tilebar1_phi_end = tilebar1_phi_start + tilebar1_phi_span

tilebar2_cell_size_eta = cell_meta['TileBar2']['cell_size_eta']
tilebar2_cell_size_phi = cell_meta['TileBar2']['cell_size_phi']
tilebar2_nEta, tilebar2_nPhi = 2, 4
tilebar2_eta_span = tilebar2_cell_size_eta * tilebar2_nEta
tilebar2_phi_span = tilebar2_cell_size_phi * tilebar2_nPhi
tilebar2_eta_start = - int(tilebar2_nEta/2) * tilebar2_cell_size_eta
tilebar2_phi_start = - int(tilebar2_nPhi/2) * tilebar2_cell_size_phi
tilebar2_eta_end = tilebar2_eta_start + tilebar2_eta_span
tilebar2_phi_end = tilebar2_phi_start + tilebar2_phi_span

layer_key = {
        1: {
            'string': 'EMB1',
            'nEta'  : emb1_nEta,
            'nPhi'  : emb1_nPhi,
            'eta_span': emb1_eta_span,
            'phi_span': emb1_phi_span,
            'csz_eta' : emb1_cell_size_eta,
            'csz_phi' : emb1_cell_size_phi,
            'eta_start': emb1_eta_start,
            'phi_start': emb1_phi_start
        },
        2: {
            'string': 'EMB2',
            'nEta'  : emb2_nEta,
            'nPhi'  : emb2_nPhi,
            'eta_span': emb2_eta_span,
            'phi_span': emb2_phi_span,
            'csz_eta' : emb2_cell_size_eta,
            'csz_phi' : emb2_cell_size_phi,
            'eta_start': emb2_eta_start,
            'phi_start': emb2_phi_start
        },
        3: {
            'string': 'EMB3',
            'nEta'  : emb3_nEta,
            'nPhi'  : emb3_nPhi,
            'eta_span': emb3_eta_span,
            'phi_span': emb3_phi_span,
            'csz_eta' : emb3_cell_size_eta,
            'csz_phi' : emb3_cell_size_phi,
            'eta_start': emb3_eta_start,
            'phi_start': emb3_phi_start
        },
        12: {
            'string': 'TileBar0',
            'nEta'  : tilebar0_nEta,
            'nPhi'  : tilebar0_nPhi,
            'eta_span': tilebar0_eta_span,
            'phi_span': tilebar0_phi_span,
            'csz_eta' : tilebar0_cell_size_eta,
            'csz_phi' : tilebar0_cell_size_phi,
            'eta_start': tilebar0_eta_start,
            'phi_start': tilebar0_phi_start
        },
        13: {
            'string': 'TileBar1',
            'nEta'  : tilebar1_nEta,
            'nPhi'  : tilebar1_nPhi,
            'eta_span': tilebar1_eta_span,
            'phi_span': tilebar1_phi_span,
            'csz_eta' : tilebar1_cell_size_eta,
            'csz_phi' : tilebar1_cell_size_phi,
            'eta_start': tilebar1_eta_start,
            'phi_start': tilebar1_phi_start
        },
        14: {
            'string': 'TileBar2',
            'nEta'  : tilebar2_nEta,
            'nPhi'  : tilebar2_nPhi,
            'eta_span': tilebar2_eta_span,
            'phi_span': tilebar2_phi_span,
            'csz_eta' : tilebar2_cell_size_eta,
            'csz_phi' : tilebar2_cell_size_phi,
            'eta_start': tilebar2_eta_start,
            'phi_start': tilebar2_phi_start
        }
    }

In [5]:
#====================
# File setup ========
#====================
# user.angerami.24559744.OutputStream._000001.root
# Number of files
Nfile = 100
fileNames = []
file_prefix = 'user.angerami.24559744.OutputStream._000'
for i in range(1,Nfile+1):
    endstring = f'{i:03}'
    fileNames.append(ext_datapath + file_prefix + endstring + '.root')

In [6]:
## GEOMETRY DICTIONARY ##
with ur.open(fileNames[0]) as geo_file:
  CellGeo_tree = geo_file["CellGeo"]
  geo_dict = dict_from_tree(tree=CellGeo_tree, branches=None, np_branches=geo_branches)

# cell geometry data
cell_geo_ID = geo_dict['cell_geo_ID']
cell_ID_dict = dict(zip(cell_geo_ID, np.arange(len(cell_geo_ID))))

In [7]:
X_large_topo = np.lib.format.open_memmap(ext_datapath + 'XTP_large_25.npy', mode='w+', dtype=np.float64,
                       shape=(4000000,938), fortran_order=False, version=None)
Y_large_topo = np.lib.format.open_memmap(ext_datapath + 'YTP_large_25.npy', mode='w+', dtype=np.float64,
                       shape=(4000000,1), fortran_order=False, version=None)

In [8]:
k = 1 # tally used to keep track of file number
tot_nEvts = 0 # used for keeping track of total number of events
tot_nEvts_topo = 0 # used for keeping track of total clusters
max_nPoints = 0 # used for keeping track of the largest 'point cloud'
t_tot = 0 # total time

for currFile in fileNames:
    print()
    print('Working on File: '+str(currFile)+' - '+str(k)+'/'+str(Nfile))
    k += 1

    ## EVENT DICTIONARY ##
    with ur.open(currFile) as event:
        event_tree = event["EventTree"]
        event_dict = dict_from_tree(tree=event_tree, branches=ak_event_branches, np_branches=np_event_branches)

        ## TRACK DICTIONARY ##
        track_dict = dict_from_tree(tree=event_tree, branches=track_branches)    
    #===================
    # APPLY CUTS =======
    #===================
    # create ordered list of events to use for index slicing
    nEvents = len(event_dict['eventNumber'])
    all_events = np.arange(0,nEvents,1,dtype=np.int32)

    # SINGLE TRACK CUT
    single_track_mask = event_dict['nTrack'] == np.full(nEvents, 1)
    filtered_event = all_events[single_track_mask]

    # CENTRAL TRACKS
    # Warning: we are safe for now with single tracks but not for multiples using this
    trackEta_EMB1 = ak.flatten(track_dict['trackEta_EMB1'][filtered_event]).to_numpy()
    central_track_mask = np.abs(trackEta_EMB1) < .7
    filtered_event = filtered_event[central_track_mask]

    # TRACKS WITH CLUSTERS
    nCluster = event_dict['nCluster'][filtered_event]
    total_clusters = sum(nCluster)
    filtered_event_mask = nCluster != 0
    filtered_event = filtered_event[filtered_event_mask]

    #============================================#
    ## CREATE INDEX ARRAY FOR TRACKS + CLUSTERS ##
    #============================================#
    event_indices = []

    for evt in filtered_event:

        # pull cluster number, don't need zero as it's loaded as a np array
        nClust = event_dict["nCluster"][evt]
        cluster_idx = np.arange(nClust)

        # Notes: this will need to handle more complex scenarios in the future for tracks with
        # no clusters

        ## DELTA R ##
        # pull coordinates of tracks and clusters from event
        # we can get away with the zeroth index because we are working with single track events
        trackCoords = np.array([event_dict["trackEta"][evt][0],
                                  event_dict["trackPhi"][evt][0]])
        clusterCoords = np.stack((event_dict["cluster_Eta"][evt].to_numpy(),
                                    event_dict["cluster_Phi"][evt].to_numpy()), axis=1)

        _DeltaR = DeltaR(clusterCoords, trackCoords)
        DeltaR_mask = _DeltaR < .2
        matched_clusters = cluster_idx[DeltaR_mask]

        ## CREATE LIST ##
        # Note: currently do not have track only events. Do this in the future    
        if np.count_nonzero(DeltaR_mask) > 0:
            event_indices.append((evt, 0, matched_clusters))

    event_indices = np.array(event_indices, dtype=np.object_)

    #=========================#
    ## DIMENSIONS OF X ARRAY ##
    #=========================#
    max_dims = find_max_dim_tuple(event_indices, event_dict)
    evt_tot = max_dims[0]
    tot_nEvts += max_dims[0]
    # keep track of the largest point cloud to use for saving later
    if max_dims[1] > max_nPoints:
        max_nPoints = max_dims[1]

    # Create arrays
    Y_new = np.zeros((max_dims[0],3))
    X_new = np.zeros(max_dims)

    topo_x = np.zeros([total_clusters, 938])
    curr_cluster = 0
    topo_y = np.zeros([total_clusters, 1])
    #===================#
    ## FILL IN ENTRIES ##==============================================================
    #===================#
    for i in range(max_dims[0]):
        # pull all relevant indices
        evt = event_indices[i,0]
        track_idx = event_indices[i,1]
        # recall this now returns an array
        cluster_nums = event_indices[i,2]

        ##############
        ## CLUSTERS ##
        ##############
        # set up to have no clusters, further this with setting up the same thing for tracks
        target_ENG_CALIB_TOT = -1
        if cluster_nums is not None:

            # find averaged center of clusters
            cluster_Eta = event_dict['cluster_Eta'][evt].to_numpy()
            cluster_Phi = event_dict['cluster_Phi'][evt].to_numpy()
            av_Eta = np.mean(cluster_Eta)
            av_Phi = np.mean(cluster_Phi)

            nClust_current_total = 0
            target_ENG_CALIB_TOT = 0
            for c in cluster_nums:            
                # cluster data
                target_ENG_CALIB_TOT += event_dict['cluster_ENG_CALIB_TOT'][evt][c]
                cluster_cell_ID = event_dict['cluster_cell_ID'][evt][c].to_numpy()
                nInClust = len(cluster_cell_ID)
                cluster_cell_E = event_dict['cluster_cell_E'][evt][c].to_numpy()            
                cell_indices = find_index_1D(cluster_cell_ID, cell_ID_dict)

                cluster_cell_Eta = geo_dict['cell_geo_eta'][cell_indices]
                cluster_cell_Phi = geo_dict['cell_geo_phi'][cell_indices]
                cluster_cell_rPerp = geo_dict['cell_geo_rPerp'][cell_indices]
                cluster_cell_sampling = geo_dict['cell_geo_sampling'][cell_indices]

                # input all the data
                # note here we leave the fourth entry zeros (zero for flag!!!)
                low = nClust_current_total
                high = low + nInClust
                X_new[i,low:high,0] = cluster_cell_E
                # Normalize to average cluster centers
                eta_norm = cluster_cell_Eta - av_Eta
                phi_norm = cluster_cell_Eta - av_Eta

                X_new[i,low:high,1] = eta_norm
                X_new[i,low:high,2] = phi_norm
                X_new[i,low:high,3] = cluster_cell_rPerp
                X_new[i,low:high,5] = cluster_cell_sampling

                # Make topo cluster
                topo_cluster = topo_clustering(eta_norm, phi_norm, cluster_cell_sampling, cluster_cell_E)

                # Flatten and place into numpy array
                new_topo_x = []
                for x in topo_cluster:
                  new_topo_x.extend(topo_cluster[x].flatten())

                new_topo_x.append(event_dict['cluster_E'][evt][c])
                new_topo_x.append(event_dict['cluster_Eta'][evt][c])

                topo_x[curr_cluster] = np.array(new_topo_x)
                topo_y[curr_cluster] = target_ENG_CALIB_TOT

                curr_cluster += 1
                nClust_current_total += nInClust

        #####################
        ## TARGET ENERGIES ##
        #####################
        # this should be flattened or loaded as np array instead of zeroth index in future
        Y_new[i,0] = event_dict['truthPartE'][evt][0]
        Y_new[i,1] = event_dict['truthPartPt'][evt][track_idx]
        Y_new[i,2] = target_ENG_CALIB_TOT

        ############
        ## TRACKS ##
        ############

        trackP = event_dict['trackP'][evt][track_idx]

        track_arr = np.zeros((6,6))
        track_arr[:,5] = track_sample_layer
        # track flag
        track_arr[:,4] = np.ones((6,))
        track_arr[:,3] = layer_rPerp

        # Fill in eta and phi values
        # this is complicated - simplify?
        p, q = 0, 1
        for j in range(12):
            # This gives the key for the track dict
            track_arr[p,q] = track_dict[track_branches[j]][evt][track_idx]
            if j%2 != 0:
                p += 1
                q = 1
            else:
                q = 2

        # search for NULL track flags
        track_eta_null_mask = np.abs(track_arr[:,1]) > 4.9
        track_phi_null_mask = np.abs(track_arr[:,2]) >= np.pi
        track_flag_null = np.logical_or(track_eta_null_mask, track_phi_null_mask)
        
        # Normalize track information!
        track_arr[:,1] = track_arr[:,1] - av_Eta
        track_arr[:,2] = track_arr[:,2] - av_Phi

        # where the flag is set to null, set values of energy and calo layer to zero
        if np.any(track_flag_null):
            # number for which to spread the energy out over
            p_nums = 6 - np.count_nonzero(track_flag_null)
            track_arr[track_flag_null,1:6] = 0
            # get where the track exists (not null)
            track_arr[np.invert(track_flag_null),0] = trackP/p_nums
        # otherwise fill in pt/6 for all
        else:
            track_arr[:,0] = trackP/6      

        # Save track information
        X_new[i,high:high+6,0:6] = track_arr

    #####################################################

    #=======================#
    ## ARRAY CONCATENATION ##
    #=======================#
    # Write to X
    old_tot = tot_nEvts - max_dims[0]
    old_tot_topo = tot_nEvts_topo
    tot_nEvts_topo += total_clusters

    X_large_topo[old_tot_topo:tot_nEvts_topo] = np.ndarray.copy(topo_x)

    Y_large_topo[old_tot_topo:tot_nEvts_topo] = np.ndarray.copy(topo_y)

    print('Array dimension: '+str(max_dims))
    print('Total events: '+str(tot_nEvts))
    print('Current size: '+str((tot_nEvts,max_nPoints,6)))
    print('Topo Events: '+str(tot_nEvts_topo))
    print()


Working on File: H:/EPE_file_storage/data_storage/pipm/user.angerami.24559744.OutputStream._000001.root - 1/100
Array dimension: (3046, 1037, 6)
Total events: 3046
Current size: (3046, 1037, 6)
Topo Events: 7381


Working on File: H:/EPE_file_storage/data_storage/pipm/user.angerami.24559744.OutputStream._000002.root - 2/100
Array dimension: (3108, 1086, 6)
Total events: 6154
Current size: (6154, 1086, 6)
Topo Events: 14783


Working on File: H:/EPE_file_storage/data_storage/pipm/user.angerami.24559744.OutputStream._000003.root - 3/100
Array dimension: (3058, 844, 6)
Total events: 9212
Current size: (9212, 1086, 6)
Topo Events: 22187


Working on File: H:/EPE_file_storage/data_storage/pipm/user.angerami.24559744.OutputStream._000004.root - 4/100
Array dimension: (3041, 1010, 6)
Total events: 12253
Current size: (12253, 1086, 6)
Topo Events: 29415


Working on File: H:/EPE_file_storage/data_storage/pipm/user.angerami.24559744.OutputStream._000005.root - 5/100
Array dimension: (3073, 884

In [9]:
final_topo_x = np.array(topo_x)

X_topo = np.lib.format.open_memmap(ext_datapath + "Xstmc_" + str(Nfile)+'.npy',
                                   mode='w+', dtype=np.float64, shape=(tot_nEvts_topo, 938))
np.copyto(dst=X_topo, src=X_large_topo[:tot_nEvts_topo],casting='same_kind', where=True)
print(X_topo.shape)
del X_topo
del X_large_topo
Y_topo = np.lib.format.open_memmap(ext_datapath + "Ystmc_" + str(Nfile)+'.npy',
                                   mode='w+', dtype=np.float64, shape=(tot_nEvts_topo, 1))
np.copyto(dst=Y_topo, src=Y_large_topo[:tot_nEvts_topo],casting='same_kind', where=True)
print(Y_topo.shape)
del Y_topo
del Y_large_topo

(736464, 938)
(736464, 1)
