# Training a hadronization net

In [9]:
import random as rnd
import numpy as np

import tensorflow as tf
import tensorflow.keras as keras

import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Layer

from sklearn.preprocessing import OneHotEncoder

In [10]:
# data from HadronizationPrep.ipynb
x_raw = np.loadtxt('x_raw.dat')
y_raw = np.loadtxt('y_raw.dat')

# Momentum model definition

In [11]:
# this is a permanent dropout layer designed to add a lot of randomness
class PermaDropout(Layer):
    def __init__(self, rate):
        super(PermaDropout, self).__init__()
        self.rate = rate

    def call(self, inputs):
        return tf.nn.dropout(inputs, rate=self.rate)

# this is the definition of our custom loss function
REG_LQCD = 1.0 
REG_LQCD_SQ = REG_LQCD**2
REG_TENS = K.constant(REG_LQCD_SQ,shape=3)
REG_TENS4 = K.constant(REG_LQCD_SQ,shape=4)
BATCH_SIZE = 128
REG_TENSB4 = K.constant(REG_LQCD_SQ,shape=[BATCH_SIZE,4])

# Create a loss function that adds the MSE loss to the mean of all squared activations of a specific layer
# note: definition is asymmetric in penalizing large momenta
def lossx(y_true,y_pred):
    #return 1.-y_true[0]*K.tanh(y_pred[0]) + 0.5*(1.+y_true[0])*K.mean(K.square(y_pred[1:] - y_true[1:])/(K.square(y_true[1:])+REG_TENS), axis=-1)
    return 1.-y_true[0]*K.tanh(y_pred[0]) + 0.5*(1.+y_true[0])*K.mean([0,1,1,1]*K.square(y_pred - y_true)/(K.square(y_true)+REG_TENS4), axis=-1)
        
        
def cust_lossx():    
    def lossx(y_true,y_pred):
        A = y_true[:,0]
        return 1.-A*K.tanh(y_pred[:,0]) + 0.5*(1.+A)*K.mean([0,1,1,1]*K.square(y_pred - y_true)/(K.square(y_true)+REG_TENSB4), axis=-1)

    return lossx

In [12]:
def define_momentum_model():
    # feature extractor model
    model = Sequential([
        Dense(64, activation='relu',input_shape=[8]),
        PermaDropout(0.5),
        Dense(64, activation='relu'),
        PermaDropout(0.5),
        Dense(4, activation='linear')
        ])
    print(model.summary())
    # Compile the model
    model.compile(optimizer='adam',loss=cust_lossx())
    return model

In [13]:
model = define_momentum_model()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                576       
_________________________________________________________________
perma_dropout_2 (PermaDropou (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
perma_dropout_3 (PermaDropou (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 4)                 260       
Total params: 4,996
Trainable params: 4,996
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
model.optimizer.lr = 0.01

## Training momentum model

In [15]:
TRAINCUT = int((int(0.85*len(x_raw))//BATCH_SIZE) * BATCH_SIZE)
VALCUT = int((int(0.1*len(x_raw))//BATCH_SIZE) * BATCH_SIZE)
TESTCUT = int((int(0.05*len(x_raw))//BATCH_SIZE) * BATCH_SIZE)
print("{} {} {}".format(TRAINCUT,VALCUT,TESTCUT))

# Note with this data, I am assuming it doesn't need to be shuffled, but may wish to do that later

x_train = x_raw[:TRAINCUT,:8]
y_train = y_raw[:TRAINCUT,:4]

x_val = x_raw[TRAINCUT:TRAINCUT+VALCUT,:8]
y_val = y_raw[TRAINCUT:TRAINCUT+VALCUT,:4]

x_test = x_raw[-TESTCUT:,:8]
y_test = y_raw[-TESTCUT:,:4]

513920 60416 30208


In [16]:
history = model.fit(x_train, y_train, epochs=30, batch_size = BATCH_SIZE, validation_data=[x_val,y_val])

Train on 513920 samples, validate on 60416 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [28]:
print(x_train[100])
print(model.predict(x_train[100:101]))

[ 0.749102   -0.21227592  0.17818509  0.69593173  0.14085039 -0.10930122
  0.06399198  0.06162063]
[[ 9.0027885e+00  1.8762186e-02  7.3483270e-03 -5.5486590e-02]]


# Flavor model definition

In [37]:
TRAINCUT = int((int(0.85*len(x_raw))//BATCH_SIZE) * BATCH_SIZE)
VALCUT = int((int(0.1*len(x_raw))//BATCH_SIZE) * BATCH_SIZE)
TESTCUT = int((int(0.05*len(x_raw))//BATCH_SIZE) * BATCH_SIZE)
print("{} {} {}".format(TRAINCUT,VALCUT,TESTCUT))

# Note with this data, I am assuming it doesn't need to be shuffled, but may wish to do that later

encin = OneHotEncoder(sparse = False)
xf_ohe = encin.fit_transform(x_raw[:,8:].astype(int))

encout = OneHotEncoder(sparse = False)
yf_ohe = encout.fit_transform(y_raw[:,4:].astype(int))

xf_train = xf_ohe[:TRAINCUT]
yf_train = yf_ohe[:TRAINCUT]

xf_val = xf_ohe[TRAINCUT:TRAINCUT+VALCUT]
yf_val = yf_ohe[TRAINCUT:TRAINCUT+VALCUT]

xf_test = xf_ohe[-TESTCUT:]
yf_test = yf_ohe[-TESTCUT:]

513920 60416 30208


In [46]:
nIn = xf_ohe.shape[1]

In [38]:
yf_train.shape

(513920, 89)

In [39]:
inflavorset = [ 21, 1,-1,2,-2,3,-3 ]
outflavorset = [ ]
for x in x_raw[:,8:]:
    for y in x:
        if y not in inflavorset:
            inflavorset.append(y)

for y in y_raw[:,4]:
    if y not in outflavorset:
        outflavorset.append(y)

nInStates = len(inflavorset)
nOutStates = len(outflavorset)

In [47]:
def define_flavor_model():
    # feature extractor model
    inputs = Input(shape=(nIn,))
    l1 = Dense(64, activation='relu')(inputs)
    l2 = Dense(64, activation='relu')(l1)
    outputs = Dense(nOutStates, activation='softmax')(l2)
    model = Model(inputs=inputs, outputs=outputs)
    print(model.summary())
    # Compile the model
    model.compile(optimizer='adam',loss='categorical_crossentropy')
    return model

In [48]:
flavormodel=define_flavor_model()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 39)]              0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                2560      
_________________________________________________________________
dense_8 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_9 (Dense)              (None, 89)                5785      
Total params: 12,505
Trainable params: 12,505
Non-trainable params: 0
_________________________________________________________________
None


In [50]:
history = flavormodel.fit(xf_train, yf_train, epochs=30, batch_size = BATCH_SIZE, validation_data=[xf_val,yf_val])

Train on 513920 samples, validate on 60416 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [51]:
yf_train.shape

(513920, 89)