In [76]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from matplotlib.colors import LogNorm
import uproot as ur
import awkward as ak
import time as t
import copy
import scipy.constants as spc
print("Awkward version: "+str(ak.__version__))
print("Uproot Version: "+str(ur.__version__))

import sys
path = '/Work/EPE/ML4pi/'
sys.path.append(path)
from util import resolution_util as ru
from util import plot_util as pu
from util import deep_set_util as dsu
from util.deep_set_util import to_xyz
from util.deep_set_util import cell_meta

Awkward version: 1.7.0
Uproot Version: 4.1.9


In [3]:
from tensorflow.keras import regularizers
from tensorflow.keras import Sequential
from tensorflow.keras import Model
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
import tensorflow as tf
from tensorflow.keras.models import load_model

In [4]:
# energyflow imports
import energyflow as ef
from energyflow.archs import PFN

In [5]:
def tvt_num(_data, _tvt=(75, 10, 15)):
    ''' Function designed to output appropriate numbers for traning validation and testing given
    a variable length input. TVT expressed as ratios and do not need to add to 100. '''
    _tot = len(_data)
    _train, _val, _test = _tvt
    _tvt_sum = _train + _val + _test
    
    _train_rtrn = round(_train*_tot/_tvt_sum)
    _val_rtrn = round(_val*_tot/_tvt_sum)
    _test_rtrn = _tot - _train_rtrn - _val_rtrn
    
    return _train_rtrn, _val_rtrn, _test_rtrn

def normalize_input1d(arr, mask):
    ''' Note that non masked values are left alone i.e. zeros if mask = arr != 0'''
    len_arr = arr.shape[0]
    mean = np.repeat(np.mean(arr, where=mask), len_arr)
    std_dev = np.repeat(np.std(arr, where=mask), len_arr)
    norm_arr = np.subtract(arr, mean, out=arr, where=mask)
    std_mask = np.logical_and(std_dev!=0, mask)
    norm_arr = np.divide(norm_arr, std_dev, out=norm_arr, where=std_mask)
    return norm_arr

def normalize_input2d(arr, mask):
    ''' Truth value is where to perform the operation, exclude False vals.
    returns: Array with the shape of arr with normalization carried out with mask '''
    len_ax1 = arr.shape[1]
    mean = np.tile(np.mean(arr, axis=1, where=mask), (len_ax1,1)).transpose()
    std_dev = np.tile(np.std(arr, axis=1, where=mask), (len_ax1,1)).transpose()
    norm_arr = np.subtract(arr, mean, out=arr, where=mask)
    std_mask = np.logical_and(std_dev != 0, mask)
    norm_arr = np.divide(norm_arr, std_dev, out=norm_arr, where=std_mask)
    return norm_arr

In [6]:
t0 = t.time()
dat = np.load('X_tracks_multipleClusters_med.npz', mmap_mode='r+')
print(dat.files)
t1 = t.time()
print('Time to load memory mapped data: '+str(t1-t0)+' (s)')

['arr_0']
Time to load memory mapped data: 0.0030002593994140625 (s)


In [7]:
t0 = t.time()
# X = np.ndarray.copy(dat['arr_0'][:175000,:,:5])
# Y = np.ndarray.copy(dat['arr_1'][:175000])
X = dat['arr_0'][:500000,:,:5]
#Y = dat['arr_1'][:500000]
print(X.shape)
#print(Y.shape)
t1 = t.time()

print('Time to copy arrays: '+str(t1-t0)+' (s)')

(3108, 825, 5)
Time to copy arrays: 0.1300051212310791 (s)


In [64]:
mask = np.ones(len(X), dtype=bool)

# cluster only mask
for i in range(len(X)):
    if X[i][0][4] == 0:
        mask[i] = True

X = X[mask]
print(X.shape)

(3108, 825, 5)


In [69]:
a = np.where(X[:,:,4]==0)


In [73]:
eta_bins = np.linspace(-1.2,1.2,80,endpoint=True)
phi_bins = np.linspace(-np.pi,np.pi,80,endpoint=True)
rPerp_bins = np.linspace(0,4000,80,endpoint=True)

In [77]:
Xraw = X
X = np.zeros(Xraw.shape)
X[:,:,0] = np.ndarray.copy(Xraw[:,:,0])


rPerp_mask = Xraw[:,:,3] != 0
print(rPerp_mask.shape)

# X[rPerp_mask,1:4] = np.ndarray.copy(Xraw[rPerp_mask,1:4])

for i in range(X.shape[0]):
    conv_mask = Xraw[i,:,3] != 0
    X[i,conv_mask,1:4] = to_xyz(np.ndarray.copy(Xraw[i,conv_mask,1:4]))

(3108, 825)


In [78]:
print(X.shape)

(3108, 825, 5)


In [90]:
# setup layer
Xraw[0][0]

array([ 2.02238312e+02, -3.93232286e-01, -5.50719500e-01,  3.01999976e+03,
        0.00000000e+00])

In [80]:
layer_rPerp = np.array([1540., 1733., 1930., 2450., 3010., 3630.])
emb1 = []
emb2 = []
emb3 = []
tilebar0 = []
tilebar1 = []
tilebar2 = []
for visual_event in range(len(X)):
    emb1_mask = np.logical_and(Xraw[visual_event,:,3] > 1400, Xraw[visual_event,:,3] < 1600)
    emb2_mask = np.logical_and(Xraw[visual_event,:,3] > 1600, Xraw[visual_event,:,3] < 1800)
    emb3_mask = np.logical_and(Xraw[visual_event,:,3] > 1800, Xraw[visual_event,:,3] < 2200)
    tilebar0_mask = np.logical_and(Xraw[visual_event,:,3] > 2200, Xraw[visual_event,:,3] < 2700)
    tilebar1_mask = np.logical_and(Xraw[visual_event,:,3] > 2700, Xraw[visual_event,:,3] < 3300)
    tilebar2_mask = np.logical_and(Xraw[visual_event,:,3] > 3300, Xraw[visual_event,:,3] < 3700)