In [1]:
from utils import progress_bar
import numpy as np
from iotools import loader_factory
import matplotlib.pyplot as plt
%matplotlib inline

def Create_Sum_Rings( DATA_DIRS=['/data/hkml_data/IWCDgrid/varyE/e-'], flavour='100k.h5', 
                      output_file='/data/UVicWorkshopPlayground/A/data/e-_tworing.npz' ):

                     
    loader=loader_factory('H5Dataset', batch_size=2, 
                          shuffle=True, num_workers=4, data_dirs=DATA_DIRS, 
                          flavour=flavour, start_fraction=0.0, use_fraction=1.0,
                          read_keys=[ 'event_data', 'labels','directions', 'energies', 'pids', 'positions'])    
    #output lists
    ev_data=[]
    labels=[]
    mring_labels=[]
    pids=[]
    positions=[]
    directions=[]
    energies=[]
    
    #charge and time data in tqev1,tqev2,tqev3
    tqev1=[]
    tqev2=[]
    tqev3=[]
    progress = display(progress_bar(0,len(loader)),display_id=True)
    for i,evt in enumerate(loader):    
        if ( i==0 or (i+1)%100==0) :
            progress.update( progress_bar(i+1,len(loader),"Progress"))
        tqev1 = evt[0][0]
        tqev2 = evt[0][1]
        tqev3 = np.copy( tqev2 )                                # make a copy of event_data to tqev3
        tqev3[:,:,0] = tqev1[:,:,0] + tqev2[:,:,0]              # add charges
        tqev3[:,:,1] = np.maximum( tqev1[:,:,1], tqev2[:,:,1])  # take maximum time
        
        ev_data.append( np.expand_dims( tqev3, axis=0 ) )
        labels.append(       0 )
        mring_labels.append( [ evt[1][0], evt[1][1] ] )
        directions.append(   [ evt[2][0], evt[2][1] ] )  
        energies.append(     [ evt[3][0], evt[3][1] ] )  
        pids.append(         [ evt[4][0], evt[4][1] ] )  
        positions.append(    [ evt[5][0], evt[5][1] ] )  
        
    print("Create_Sum_Rings finished loop over events")  
    all_events=np.concatenate(ev_data)
    all_labels=np.asarray(labels)
    all_mringlabels=np.asarray(mring_labels)
    all_pids=np.asarray(pids)
    all_positions=np.asarray(positions)
    all_directions=np.asarray(directions)
    all_energies=np.asarray(energies)
    print("Begin data compress")
    np.savez_compressed(output_file,
                    event_data=all_events,
                    labels=all_labels,
                    pids=all_pids,
                    positions=all_positions,
                    directions=all_directions,
                    energies=all_energies,
                    mring_labels=all_mringlabels)
    print("Done Create_Sum_Rings")


In [2]:
Create_Sum_Rings()
print("Done.")

Create_Sum_Rings finished loop over events
Begin data compress
Done Create_Sum_Rings
Done.


In [3]:
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import numpy as np
import sys, h5py, tables, os

def make_h5(file_list,output_file):
    """
    Args: file_list ... list, files to be taken as input
          output_file ... string, name of an output h5 file
    """

    print('Will process',len(file_list),'files...')

    # Create output file
    FILTERS   = tables.Filters(complib='zlib', complevel=5)
    output    = tables.open_file(output_file,mode='w',filters=FILTERS)
    out_ndarray = {}
    out_1darray = {}
    label     = None

    # Loop over files, read data & store
    # For labels, since it's a small 1D array, we store all at the end
    # For event_data, they will be appended file-by-file
    for file_index,file_name in enumerate(file_list):
        # Open file
        f = np.load(file_name)

        for key in f.keys():
            data_shape = f[key].shape
            if len(data_shape) < 2:
                if not key in out_1darray: out_1darray[key]=f[key].astype(np.float32)
                else: out_1darray[key] = np.hstack([out_1darray[key],f[key].astype(np.float32)])
            else:
                if not key in out_ndarray:
                    chunk_shape = [1] + list(data_shape[1:])
                    data_shape  = [0] + list(data_shape[1:])
                    out_ndarray[key] = output.create_earray(output.root,key,tables.Float32Atom(),chunkshape=chunk_shape,shape=data_shape)
                out_ndarray[key].append(f[key].astype(np.float32))

        sys.stdout.write('Progress: %1.3f\r' % (float(file_index+1)/len(file_list)))
        sys.stdout.flush()
    sys.stdout.write('\n')
    # Create chunked-array to store 1D arrays
    for key in out_1darray:
        data = out_1darray[key]
        out_data = output.create_carray(output.root, key, tables.Float32Atom(), shape=data.shape)
        out_data[:] = data

    # Report what's stored
    print('\nFinished!\n')
    # Close output file
    output.close()

    import h5py
    f=h5py.File(output_file,mode='r')
    print('Stored keys:',f.keys())
    for key in f.keys():
        print('    %s ... shape %s' % (key,f[key].shape))

In [4]:
make_h5(['/data/UVicWorkshopPlayground/A/data/e-_tworing.npz'],'e-_tworing_100k.h5')

Will process 1 files...
Progress: 1.000

Finished!

Stored keys: [u'directions', u'energies', u'event_data', u'labels', u'mring_labels', u'pids', u'positions']
    directions ... shape (50000, 2, 88, 168, 2)
    energies ... shape (50000, 2)
    event_data ... shape (50000, 88, 168, 2)
    labels ... shape (50000,)
    mring_labels ... shape (50000, 2)
    pids ... shape (50000, 2, 1, 3)
    positions ... shape (50000, 2, 1)
