### HDF5 Stuff

In [1]:
import h5py
h5_filename = "/pscratch/sd/f/fernando/GSGM_for_EIC_Calo/scripts/truncated_1000cells_FPCD.hdf5"
f = h5py.File(h5_filename, 'r')
f.keys()
chunk_size=2000

print("[E, X, Y, Z]")
for i in range(5):
    print(f['hcal_cells'][1,i,:-1]) #XYZ, skip mask

[E, X, Y, Z]
[ 2.1972656e-04 -1.5000000e+03  1.0000000e+02  4.1257002e+03]
[ 2.746582e-04 -1.300000e+03  0.000000e+00  3.915100e+03]
[ 8.544922e-05 -1.200000e+03  1.000000e+02  3.961900e+03]
[ 7.93457e-05 -1.20000e+03  1.00000e+02  3.91510e+03]
[ 9.765625e-05 -1.300000e+03  2.000000e+02  3.868300e+03]


In [2]:
h5_indexE = 0
h5_indexX = 1
h5_indexY = 2
h5_indexZ = 3

In [3]:
import numpy as np
print(np.shape(f['hcal_cells']))

(1794000, 1000, 5)


### Detector Parameters XYZ

In [4]:
voxel_factor=2

minXY = -2700
maxXY = 2700
widthXY = 100
ncellsXY = int(((maxXY-minXY)/widthXY)/voxel_factor)
binningXY = np.linspace(minXY,maxXY,ncellsXY+1)

minZ = 3820
maxZ = 4991
widthZ = 23.4
ncellsZ = int((((maxZ-minZ)/widthZ)+3)/voxel_factor)
binningZ = np.linspace(minZ,maxZ,ncellsZ+1)
print(ncellsZ)

26


In [5]:
data = f['hcal_cells'][:chunk_size]

In [6]:
import torch
def points_to_image_torch(xs, ys, ps, sensor_size=(180, 240)):
    xt, yt, pt = torch.from_numpy(xs), torch.from_numpy(ys), torch.from_numpy(ps)
    img = torch.zeros(sensor_size)
    img.index_put_((yt, xt), pt, accumulate=True)
    return img

In [7]:
from tqdm import tqdm
nevents = np.shape(f['hcal_cells'])[0]
with h5py.File(f'epic_hcal_images_{voxel_factor}x{voxel_factor}.h5', 'w') as newfile:
    # create empty data set
    dset = newfile.create_dataset('calo_images', shape=(nevents, ncellsXY, ncellsXY, ncellsZ),
                            maxshape=(None, ncellsXY, ncellsXY, ncellsZ), chunks=(chunk_size, ncellsXY, ncellsXY, ncellsZ),
                            dtype=np.float32)
    
    for chunk in tqdm(range(int(nevents/chunk_size))):
    #for chunk in range(10):
        # add chunk of rows
        start = chunk*chunk_size
        #print(start)
        data = f['hcal_cells'][start:start+chunk_size]
        images = []
        
        for evt in range(chunk_size):
            counts, binedges = np.histogramdd(data[evt,:,:-2], bins=(ncellsXY,ncellsXY,ncellsZ), weights=data[evt,:,h5_indexE])
            images.append(counts)
        images=np.asarray(images)
        dset[start:start+chunk_size, :, :,:] = images[:, :, :,:]

100%|██████████| 897/897 [11:14<00:00,  1.33it/s]


In [8]:
with h5py.File('epic_hcal_images.h5', 'r') as newfile:
    #print(f['calo_images'].attrs['last_index'])
    print(newfile['calo_images'].shape)
    #print(f['embeds'][0:26])
    #print(newfile['calo_images'][0])
    for i in range(13):
        for row in newfile['calo_images'][0,:,:,i]:
            print(row)

(1794000, 27, 27, 27)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.