# Exploring the Decay Position Network

This is repeating a little bit of the work that Rachel did - but going straight to a training. This notebook will use karas.

## Config

In [49]:
func_adl_endpoint = 'http://localhost:8000'
datasets_for_training_datafile = "../data/datasets.csv"

# Constants derived in previous notebook. Need to be added to a python file of config constants.
lxyz_eta_division = 1.3
too_far_dist_lz = 7500
too_far_dist_lxy = 4400
too_short_dist_lxy=1300
too_short_dist_lz=3500

# Columns to train on. This is partly gotten by looking at the `Input Variables` worksheet to remove blanks.
what_to_train_on = ['EMM_BL0', 'EMM_BL1', 'EMM_BL2',
       'EMM_BL3', 'EMM_EL0', 'EMM_EL1', 'EMM_EL2', 'EMM_EL3', 'EH_EL0',
       'EH_EL1', 'EH_EL2', 'EH_EL3', 'EH_CBL0', 'EH_CBL1', 'EH_CVL2',
       'EH_TGL0', 'EH_TGL1', 'EH_TGL2', 'EH_EBL0', 'EH_EBL1', 'EH_EBL2']
#  'FC_L0', 'FC_L1', 'FC_L2' - these seem to be all zeros as seen before.

## Python setup

In [11]:
# Designed not to be modified
import sys
sys.path.append("../")
from adl_func_client.event_dataset import EventDataset
from adl_func_client.use_exe_func_adl_server import use_exe_func_adl_server
from calratio_perjet_training.fetch_training_data import fetch_perjet_data
import glob
import numpy as np
import asyncio

import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from matplotlib import rcParams
plt.rc('font', size=14)

import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Dropout
import sklearn.metrics

Using TensorFlow backend.


## Load datasets

In [3]:
datasets = pd.read_csv(datasets_for_training_datafile)

In [4]:
async def fetch_data_async(info):
    return [info, f'{info.mH}_{info.mS}_{info.Lifetime}_{info.MCCampaign}', await fetch_perjet_data(EventDataset(f'localds://{info.RucioDSName}'), f'{info.mH}_{info.mS}_{info.Lifetime}_{info.MCCampaign}')]
all_datasets_future = [fetch_data_async(info) for index, info in datasets.iterrows()]
datasets_for_training = await asyncio.gather(*all_datasets_future)

In [8]:
what_to_combine = datasets_for_training[20:35]
all_jets = pd.DataFrame(pd.concat([d[2] for d in what_to_combine], keys=[(d[0].mH, d[0].mS, d[0].Lifetime, f'{d[0].mH}/{d[0].mS}', d[0].MCCampaign) for d in what_to_combine], names=['mH', 'mS', 'Lifetime', 'mH_mS', 'MC']).to_records())


Nextw we need to add a bunch of columns to the data to make it easy to use below. Eventually this code should be in a python file (along with the above code, I suppose).

In [20]:
all_jets['Lxy'] = np.sqrt(all_jets.Lx*all_jets.Lx + all_jets.Ly*all_jets.Ly)

all_jets['IsOutlier'] = False
all_jets['IsOutlier'] |= all_jets.Lxy[all_jets.IsLLP & (np.abs(all_jets.JetEta) < lxyz_eta_division)] > too_far_dist_lxy
all_jets['IsOutlier'] |= all_jets.Lz[all_jets.IsLLP & (np.abs(all_jets.JetEta) >= lxyz_eta_division)] > too_far_dist_lz

all_jets['IsInlier'] = False
all_jets['IsInlier'] |= all_jets.Lxy[all_jets.IsLLP & (np.abs(all_jets.JetEta) < lxyz_eta_division)] < too_short_dist_lxy
all_jets['IsInlier'] |= all_jets.Lz[all_jets.IsLLP & (np.abs(all_jets.JetEta) >= lxyz_eta_division)] < too_short_dist_lz

all_jets['JetIsCentral'] = np.abs(all_jets.JetEta) < lxyz_eta_division

all_jets["Signal"] = all_jets.IsLLP & (all_jets.JetPt > 40) & (np.abs(all_jets.JetEta) < 2.4) & (all_jets.IsOutlier == False) & (all_jets.IsInlier == False)

In [21]:
all_jets.columns

Index(['mH', 'mS', 'Lifetime', 'mH_mS', 'MC', 'entry', 'RunNumber',
       'EventNumber', 'JetPt', 'JetEta', 'JetPhi', 'IsLLP', 'nLLPs_Near_Jets',
       'Lx', 'Ly', 'Lz', 'Leta', 'Lphi', 'EMM_BL0', 'EMM_BL1', 'EMM_BL2',
       'EMM_BL3', 'EMM_EL0', 'EMM_EL1', 'EMM_EL2', 'EMM_EL3', 'EH_EL0',
       'EH_EL1', 'EH_EL2', 'EH_EL3', 'EH_CBL0', 'EH_CBL1', 'EH_CVL2',
       'EH_TGL0', 'EH_TGL1', 'EH_TGL2', 'EH_EBL0', 'EH_EBL1', 'EH_EBL2',
       'FC_L0', 'FC_L1', 'FC_L2', 'Lxy', 'IsOutlier', 'IsInlier',
       'JetIsCentral', 'Signal'],
      dtype='object')

## Training and testing samples

We can't use `IsLLP` as the direct truth - some jets decay at the IP and are due to an LLP. We did a bunch of work in the notebook `Input Variable Plots` looking at how the inputs behaved and where to place cuts. We've put that in the `Signal` column.

In [25]:
train_signal = all_jets[all_jets.Signal == True]
train_back = all_jets[all_jets.Signal == False]

In [30]:
training_events = 20000
train_signal_c = (train_signal[:training_events], train_signal[training_events:2*training_events])

In [31]:
print (f'Signal (training: {len(train_signal_c[0])}, test:{len(train_signal_c[1])})')

Signal (training: 20000, test:20000)


And put them into the traditional x and y train and test things.

In [52]:
def norm_inputs(p):
    'Noramilze between 0 and 1 inputs'
    mean = p.mean()
    std = p.std()
    return (p - mean) / std

y_train = norm_inputs(train_signal_c[0].Lxy)
y_test = norm_inputs(train_signal_c[1].Lxy)

x_train = norm_inputs(train_signal_c[0].filter(items=what_to_train_on))
x_test = norm_inputs(train_signal_c[1].filter(items=what_to_train_on))

## Build the Model

In [60]:
#%%capture --no-stdout --no-display

model = Sequential()
model.add(Dense(64, activation='relu', input_dim=len(x_train.columns)))
#model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
#model.add(Dropout(0.5))
# THis is the number of outputs - so could be 2 if we wanted to train both lxy and lz
# Activation might be softmax if we had more than one thing as we would would want it to some to some number.
# But since this is regression, we do not.
model.add(Dense(1))

#categorical_crossentropy
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(x_train, y_train, batch_size=32, epochs=200, validation_split=0.25, shuffle=True);

Train on 15000 samples, validate on 5000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/20