### This notebook investigates the training data '1D_aLund_train.npz' and calculate the dctr weight for each of the alund value

In [6]:
import numpy as np

In [7]:
data_dir = ''
dataset = np.load(data_dir + '1D_aLund_train.npz')

In [8]:
X_train = dataset['X']

The shape is Nx51x7 where N is the number of data points, 51 is the maximum number of nonzero jets and 7 is the 4-momenta and the 3 parameter (TimeShower:alphaSvalue,StringZ:aLund ,StringFlav:probStoUD)

In [22]:
X_train.shape

(1800000, 51, 7)

Investigate the aLund_value

In [9]:
aLund_value_list = X_train[:,0,5]

In [10]:
aLund_value_list = list(aLund_value_list)

### Number of different alund values

In [13]:
print("There are ", len(set(aLund_value_list)), " difference alund value in the dataset")

There are  88335  difference alund value in the dataset


### Construct a dictionary with aLund value as a key and the index of the data with respect to that alund value as a value

In [14]:
aLund_value_dict = {}
for i in range(len(X_train)):
    aLund_value = X_train[i,0,5]
    if aLund_value not in aLund_value_dict.keys():
        aLund_value_dict[aLund_value] = [i]
    else:
        aLund_value_dict[aLund_value].append(i)

In [15]:
aLund_value_dict

{0.8934119939804077: [0,
  46724,
  191894,
  322741,
  324209,
  366197,
  379613,
  468105,
  511725,
  512461,
  518416,
  524077,
  598986,
  636710,
  663426,
  673339,
  695455,
  731603,
  845004,
  910688,
  940980,
  958202,
  1030312,
  1037818,
  1111949,
  1136171,
  1197289,
  1230604,
  1336531,
  1357608,
  1526276,
  1592893,
  1743795,
  1764208,
  1767287,
  1789821],
 0.8586440086364746: [1,
  128838,
  135262,
  154774,
  183706,
  271149,
  313182,
  417770,
  467564,
  471467,
  529719,
  637131,
  641712,
  717459,
  789978,
  826897,
  827792,
  842940,
  847028,
  935476,
  944368,
  946893,
  951098,
  959394,
  1102792,
  1108436,
  1153879,
  1301331,
  1317315,
  1512255,
  1515358,
  1557035,
  1688888,
  1729665,
  1745826,
  1777040],
 0.505994975566864: [73592,
  198465,
  257474,
  319813,
  515303,
  515976,
  886713,
  908582,
  1009651,
  1088847,
  1096973,
  1097147,
  1199925,
  1327246,
  1437484,
  1509811,
  1621833,
  1775427],
 0.86788898706

In [16]:
import pickle
outfile = open('aLund_value_dict.pkl','wb')
pickle.dump(aLund_value_dict,outfile)
outfile.close()

In [17]:
import pickle

import keras

# standard numerical library imports
import numpy as np
import scipy as sp

# energyflow imports
import energyflow as ef
from energyflow.archs import PFN
from energyflow.utils import data_split, remap_pids, to_categorical

import matplotlib.pyplot as plt

import keras.backend as K

aLund_value_dict = pickle.load(open("aLund_value_dict.pkl","rb"))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Download the saved .h5 model structure and calculate the predicted weight for each alund value

In [18]:
# network architecture parameters
Phi_sizes = (100,100, 128)
F_sizes = (100,100, 100)

dctr = PFN(input_dim=7, 
           Phi_sizes=Phi_sizes, F_sizes=F_sizes,
           summary=True)

#load model from saved file
dctr.model.load_weights('./saved_models/DCTR_ee_dijets_1D_aLund.h5')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, None, 7)      0                                            
__________________________________________________________________________________________________
tdist_0 (TimeDistributed)       (None, None, 100)    800         input[0][0]                      
__________________________________________________________________________________________________
activation_1 (Activation)       (None, None, 100)    0           tdist_0[0][0]                    
__________________________________________________________________________________________________
tdist_1 (TimeDistributed)       (None, None, 100)    10100       activation_1[0][0]               
__________________________________________________________________________________________________
activation

### Calculate the dctr weight and put it in a dictionary

In [19]:
for key in aLund_value_dict.keys():
    aLund_value_dict[key] = dctr.predict(X_train[aLund_value_dict[key]])

In [20]:
aLund_value_dict

{0.8934119939804077: array([[4.2050488e-02, 9.5794952e-01],
        [2.5577949e-02, 9.7442198e-01],
        [9.6652228e-01, 3.3477720e-02],
        [9.9004936e-01, 9.9506145e-03],
        [3.7406552e-01, 6.2593454e-01],
        [2.3400272e-01, 7.6599729e-01],
        [4.1747972e-01, 5.8252025e-01],
        [8.6175114e-01, 1.3824882e-01],
        [5.9383976e-01, 4.0616021e-01],
        [5.1889914e-01, 4.8110086e-01],
        [8.8929355e-01, 1.1070651e-01],
        [9.7093719e-01, 2.9062865e-02],
        [9.1865319e-01, 8.1346855e-02],
        [8.5675466e-01, 1.4324540e-01],
        [8.9326376e-01, 1.0673626e-01],
        [3.5289532e-01, 6.4710468e-01],
        [4.8395339e-01, 5.1604658e-01],
        [7.7956301e-01, 2.2043695e-01],
        [9.0566432e-01, 9.4335653e-02],
        [9.9985147e-01, 1.4845916e-04],
        [7.3473291e-03, 9.9265265e-01],
        [6.8958396e-01, 3.1041607e-01],
        [3.1935582e-01, 6.8064421e-01],
        [9.8508251e-01, 1.4917551e-02],
        [3.9328128e-

In [21]:
import pickle
outfile = open('aLund_value_weight.pkl','wb')
pickle.dump(aLund_value_dict,outfile)
outfile.close()