In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import uproot as ur
import uproot_methods as urm
import numpy as np
import awkward
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.preprocessing.sequence import pad_sequences

import tools
from four_jet_network import FourJetNetwork

filename = '/fast_scratch/atlas_bbbb/MAR20p0/user.jagrundy.20736236._000001.MiniNTuple.root'

print("opening file")
s_table = tools.open_file(filename, sort_by="tag")

Using TensorFlow backend.


opening file
sorting data by tag


# Stage 1, pt classifier

- if 4th jet found, keep selection based on this method
- if not, proceed to stage 2

In [None]:
print("filtering from", len(s_table), "total events")

# filter so we get events with 3 or 4 b-jets, and 3 tags
nb34 = (s_table.nbjets == 3) | (s_table.nbjets == 4) # 3 or 4 b-jets
nj4 = s_table.njets >= 4 # at least 4 jets
nt3 = s_table.nbtags==3  # 3 b-tags
events = s_table[nb34 & nt3 & nj4]

# and ensure that the 3 tags are actually correct
# this results in very little event loss
events = events[events.truth[:,0] == 1]
events = events[events.truth[:,1] == 1]
events = events[events.truth[:,2] == 1]

n_events = len(events)
print(n_events)

In [None]:
cutoff = 10  # np.max(events.njets)
padding_val = 0

# pad events out to 'cutoff' events
events = tools.pad(events, cutoff)
# boost/rotate
events = tools.boost_and_rotate(events)

In [None]:
import pt_selector
selections = pt_selector.select(events)
# compare to tag_u, truth_u
tools.evaluate_model(events.truth, events.tag, selections, savename='after_stage_1')

# Stage 2: The 4-jet NN Gauntlet

- make 4-jet nn
- split events into 4-jet groups
- take best-fit jet in terms of 4-jet nn results

## To make the 4-jet nn, just copy the 4b notebook

In [None]:
import importlib
# filter so we get events with 4 jets, 3 or 4 b-jets, and 3 tags
j4 = (s_table.njets == 4) # 4 jets
b34 = (s_table.nbjets == 3) | (s_table.nbjets == 4) # 3 or 4 b-jets
nt3 = s_table.nbtags==3  # 3 b-tags
events_4j = s_table[j4 & b34 & nt3]
events_4j = events_4j[events_4j.truth[:,0] == 1]
events_4j = events_4j[events_4j.truth[:,1] == 1]
events_4j = events_4j[events_4j.truth[:,2] == 1]
# pad, boost, rotate
events_4j = tools.pad(events_4j, length=4)
events_4j = tools.boost_and_rotate(events_4j)
# create network
nn = FourJetNetwork(events_4j)
nn.model = Seuquential(
    Dense(700, activation='relu'),
    Dropout(0.1),
    Dense(500, activation='relu'),
    Dropout(0.1),
    Dense(300, activation='relu'),
    Dropout(0.1),
    Dense(100, activation='relu'),
    Dropout(0.1),
    Dense( 50, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
)
optimizer = Adam(lr=5e-5)
nn.model.compile(loss='binary_crossentropy', optimizer=optimizer,
                  metrics=['accuracy'])
nn.model.summary()
nn.learn(epochs=400)
#nn.evaluate()

In [None]:
# the network score for some events is given by
def score(nn, evs):
    """Note: this expects evs to be events of 4 jets"""
    nn_input = tools.scale_nn_input(evs)
    nn_score = nn.model.predict(nn_input)
    return nn_score.flatten()

In [None]:
scores = np.zeros((len(events), cutoff), dtype=float)

# each group has this size
group_len = len(events.truth[picked_no_4th])

group_events = awkward.Table()
group_events.truth = np.concatenate([events.truth[picked_no_4th][:,[0,1,2,i]] for i in range(3, cutoff)])
group_events.tag = np.concatenate([events.tag[picked_no_4th][:,[0,1,2,i]] for i in range(3, cutoff)])
pt = np.concatenate([events.resolved_lv.pt[picked_no_4th][:,[0,1,2,i]] for i in range(3, cutoff)])
eta = np.concatenate([events.resolved_lv.eta[picked_no_4th][:,[0,1,2,i]] for i in range(3, cutoff)])
phi = np.concatenate([events.resolved_lv.phi[picked_no_4th][:,[0,1,2,i]] for i in range(3, cutoff)])
E = np.concatenate([events.resolved_lv.E[picked_no_4th][:,[0,1,2,i]] for i in range(3, cutoff)])
group_events.resolved_lv = urm.TLorentzVectorArray.from_ptetaphie(
    pt, eta, phi, E)
print(len(group_events.truth))
group_nn = FourJetNetwork(group_events)
group_nn.learn(epochs=300)
#group_nn.evaluate()

In [None]:
group_scores = score(group_nn, group_events)
print(group_scores.shape)
group_scores = group_scores.reshape((group_len, cutoff-3))

In [None]:
maxes = np.max(group_scores, axis=1)
select = np.argmax(group_scores,axis=1)
select[maxes < 0.5] = cutoff
nn_selections = np.zeros((len(select), cutoff+1), dtype=int)
for i, s in enumerate(select):
    nn_selections[i][s] = 1

# chop off the last "no selection" jet
nn_selections = nn_selections[:,:-1]

# and actually evaluate
#tools.evaluate_model(events.truth[picked_no_4th], events.tag[picked_no_4th], nn_selections)

In [None]:
# edit the selections from earlier
selections[picked_no_4th] = nn_selections
tools.evaluate_model(events.truth, events.tag, selections, savename="after_stage_2")