In [9]:
import os
import math

import matplotlib.pyplot as plt 

import numpy as np
import pandas as pd
import random

In [10]:
#global variables
maxT = 37
maxLambda = 800
PAD_VALUE = -1
NEvts = 1000

In [13]:
# takes input array and creates a list of arrays, where each array in the list is a 1D event
# each 1D event is [hit1_theta, hit1_phi, hit1_time, hit1_color,   hit2_theta, hit2_phi, hit2_time, hit2_color etc]
# takes color only if take_color=1
def create_images(data, take_color=1):
    #print(data[:,0])
    images = []
    for i_event in range(0,NEvts) :
        X = data[np.where(data[:,0]==i_event)] #take all rows where 1st colummn correspond to current event
        
        X = np.delete(X,0,1) #delete column with event number
        
        pi = 3.1415926535
        if take_color==0 :
            X = np.delete(X,3,1) #delete column with color
        
        n_phot = X.shape[0]
        ##time_jitter = np.random.normal(0,0.4899,n_phot)
        ##time_jitter = np.random.normal(0,0.7433,n_phot)
        #time_jitter = np.random.normal(0,0.229129,n_phot)
        #np.random.shuffle(time_jitter)
        #time_jitter = time_jitter.reshape(n_phot,1)
        th_ph = np.zeros((n_phot,2))
        if take_color==1 :
            color = np.zeros((n_phot,1))
        #time_jitter = np.hstack((th_ph,time_jitter))
        #X = X - time_jitter
        
        t_min = np.ones((n_phot,1))*min(X[:,2])
        t_min = np.hstack((th_ph,t_min))
        if take_color==1 :
            t_min = np.hstack((t_min,color))
        X = X - t_min
               
        X=X[np.argsort(X[:,2])]  #order hits by time within an event
        
        #pseudo-normalization
        X[:,0] /= pi        #normalizing theta
        X[:,1] /= (2*pi)    #normalizing phi
        X[:,2] /= 8         #normalizing time (relative to the first hit)
        if take_color==1 :
            X[:,3] /= 800       #normalizing color
        
        X = X.reshape(X.shape[0]*X.shape[1]) #reshape into 1D array
        images.append(X)
                  
        
    return images

In [12]:
from keras.utils import Sequence
from keras.preprocessing.sequence import pad_sequences

def process_a_folder(input_dir, take_color, out_dir) :
    x1=os.listdir(input_dir)
    x1.sort()
    print(x1)
    print(len(x1))
    for idx in range (0,len(x1)) :
        print(idx)
        print(x1[idx])
        print(os.path.join(input_dir, x1[idx]))
        batch_data1 = np.load(os.path.join(input_dir, x1[idx]))
        data1 = create_images(batch_data1['evt_theta_phi_time'],take_color)
        vtx1 = batch_data1['vtx_xyz']
        dir1 = batch_data1['dir_xyz']
                
        #longest_row = max(data1,key=len) #find longest row in data
        
        Lmax = 3300 #9000 #len(longest_row) #lenght of the longest row - this is needed for padding
        
        batch_x1 = pad_sequences(data1, maxlen=Lmax, dtype='float64', padding='post', truncating='post', value=PAD_VALUE)
        batch_y1 = np.hstack((vtx1,dir1))
        
        #the following is only needed for shuffling, not clear if that's needed for vertexing
        batch = np.concatenate((batch_x1, batch_y1), axis=1)
        batch_x1, batch_y1 = [], []
        np.random.shuffle(batch)
        batch_y1 = batch[:,-6:]
        batch_y1 = np.delete(batch_y1, np.s_[-3:],1) ### for the time being deleting directionality info
        
        batch_x1 = np.delete(batch, np.s_[-6:],1)
        ###  end of the shuffling segment
        
        print('batch_x1.shape = ',batch_x1.shape, 'batch_y1.shape = ',batch_y1.shape)
        #print(batch_x)
        #print(batch_y)
        print('=====')
        print('')
        batch = []
     
        
        out_file = 'f_batch_' + str(0+idx+1) + '.npz'
        np.savez(os.path.join(out_dir, out_file), x=batch_x1, y=batch_y1)
        
    return 0

In [None]:
process_a_folder('/data/Elagin/vtx_dir_xyz_color_train/', 0, '/data/Elagin/vtx_xyz_nocolor_train_norm')
#process_a_folder('/data/Elagin/sig_test_maxT52_che', '/data/Elagin/bkg_test_maxT52_che', 0, '/data/Elagin/test_maxT52_che')