This file has been a sandbox for when I want to test out something NN-related without using the whole analyzer. **Don't assume anything good will come out of copying code from here!**

In [1]:
import awkward as ak
import numpy as np

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import PackedSelection

from coffea.ml_tools.torch_wrapper import torch_wrapper

import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import pickle

### Need to run these before anything else

In [None]:
class jetAssignmentNN(torch_wrapper):
    def prepare_awkward(self,events):
        # ak = self.get_awkward_lib(events)
        jets = events.good_jets
        flat_jets = ak.flatten(jets)

        m3 = jets[:,1:4].sum()
        m4 = jets[:,0:4].sum()

        ones = ak.ones_like(jets.pt)

        imap = {
            "features": {
                "jetOrdinality":    ak.flatten(ak.local_index(jets, axis=1)),
                "jetPT": 		    flat_jets.pt - 2,
                "jetEta": 		    flat_jets.eta,
                "jetPhi": 		    flat_jets.phi,
                "jetBScore":    	flat_jets.btagDeepFlavB,
                "m3M": 			    ak.flatten(ones * m3.mass),
                "m3PT": 		    ak.flatten(ones * m3.pt),
                "m3Eta": 		    ak.flatten(ones * m3.eta),
                "m3Phi": 	        ak.flatten(ones * m3.phi),
                "m4M": 			    ak.flatten(ones * m4.mass),
                "m4PT":		        ak.flatten(ones * m4.pt),
                "m4Eta":		    ak.flatten(ones * m4.eta),
                "m4Phi":	        ak.flatten(ones * m4.phi)
            }
        }
        
        imap_concat = ak.concatenate([x[:, np.newaxis] for x in imap['features'].values()], axis=1)
        imap_scaled = (imap_concat - scaler.mean_) / scaler.scale_
        return (ak.values_astype(imap_scaled, "float32"),),{}

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(13,13)
        self.fc2 = nn.Linear(13,3)
    def forward(self,features=None):
        features = F.relu(self.fc1(features))
        features = F.softmax(self.fc2(features),dim=1)
        return features

### Converting model to TorchScript (if it isn't already)
All the models I've made have equivalent TorchScript versions, ending in "_traced".

In [None]:
 # An instance of your model.
model = torch.load("jetMatcherNN_highly_uncomp.pt")
model.eval()

# An example input you would normally provide to your model's forward() method.
example = torch.rand(1, 13)
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("jetMatcherNN_highly_uncomp_traced.pt")

### Events

In [None]:
#TODO: fix path issue
fname = "data/NANOAOD-1200_400.root"
events = NanoEventsFactory.from_root(
    {fname: "Events"},
    schemaclass=NanoAODSchema,
    metadata={"dataset": "DYJets"},
    delayed=False
).events()

In [3]:
def makeCutSet(x, s, *args):
    return [x[s > a] for a in args]

b_tag_wps = [0.0490, 0.2783, 0.7100]

def createObjects(events):
    good_jets = events.Jet[(events.Jet.pt > 30) & (abs(events.Jet.eta) < 2.4)]
    fat_jets = events.FatJet[(events.FatJet.pt > 30) & (abs(events.FatJet.eta) < 2.4)]
    loose_top, med_top, tight_top = makeCutSet(
        fat_jets, fat_jets.particleNet_TvsQCD, 0.58, 0.80, 0.97
    )
    #loose_W, med_W, tight_W = makeCutSet(
    #    fat_jets, fat_jets.particleNet_WvsQCD, 0.7, 0.94, 0.98
    #)

    #deep_top_wp1, deep_top_wp2, deep_top_wp3, deep_top_wp4 = makeCutSet(
    #    fat_jets, fat_jets.deepTag_TvsQCD, 0.436, 0.802, 0.922, 0.989
    #)
    #deep_W_wp1, deep_W_wp2, deep_W_wp3, deep_W_wp4 = makeCutSet(
    #    fat_jets, fat_jets.deepTag_WvsQCD, 0.458, 0.762, 0.918, 0.961
    #)
    loose_b, med_b, tight_b = makeCutSet(
        good_jets, good_jets.btagDeepFlavB, *(b_tag_wps[x] for x in range(3))
    )

    el = events.Electron
    good_electrons = el[
        (el.cutBased == 4)
        & (el.miniPFRelIso_all < 0.1)
        & (el.pt > 30)
        & (abs(el.eta) < 2.4)
    ]
    mu = events.Muon
    good_muons = mu[
        (mu.mediumId) & (mu.miniPFRelIso_all < 0.2) & (mu.pt > 30) & (abs(mu.eta) < 2.4)
    ]
    events["good_jets"] = good_jets
    events["good_electrons"] = good_electrons
    events["good_muons"] = good_muons

    events["loose_bs"] = loose_b
    events["med_bs"] = med_b
    events["tight_bs"] = tight_b

    events["tight_tops"] = tight_top
    # events["med_tops"] = med_top
    # events["loose_tops"] = loose_top

    # events["tight_Ws"] = tight_W
    # events["med_Ws"] = med_W
    # events["loose_Ws"] = loose_W

    # events["deep_top_wp1"] = deep_top_wp1
    # events["deep_top_wp2"] = deep_top_wp2
    # events["deep_top_wp3"] = deep_top_wp3
    # events["deep_top_wp4"] = deep_top_wp4

    # events["deep_W_wp1"] = deep_W_wp1
    # events["deep_W_wp2"] = deep_W_wp2
    # events["deep_W_wp3"] = deep_W_wp3
    # events["deep_W_wp4"] = deep_W_wp4

    return events

def createSelection(events):
    """Baseline selection for the analysis.
Applies the following selection:
- Jets[0].pt > 300
- 4 <= nJets <= 6
- 0 leptons
- 2 medium bjets, at least one of which is tight
- delta_R(med_bjets[0],med_bjets[1]) > 1
    """

    selection = PackedSelection()
    good_jets = events.good_jets
    fat_jets = events.FatJet
    good_muons = events.good_muons
    good_electrons = events.good_electrons
    loose_b = events.loose_bs
    med_b = events.med_bs
    tight_b = events.tight_bs
    tight_top = events.tight_tops
    # selection = PackedSelection()
    filled_jets = ak.pad_none(good_jets, 4, axis=1)
    top_two_dr = ak.fill_none(filled_jets[:, 0].delta_r(filled_jets[:, 1]), False)

    filled_med = ak.pad_none(med_b, 2, axis=1)
    med_dr = ak.fill_none(filled_med[:, 0].delta_r(filled_med[:, 1]), False)
    # selection.add("trigger", (ak.num(good_jets) >= 4) & (ak.num(good_jets) <= 6))
    if "HLT" in events.fields:
        selection.add("hlt", (events.HLT.PFHT1050 | events.HLT.AK8PFJet360_TrimMass30))
    selection.add("highptjet", (ak.fill_none(filled_jets[:, 0].pt > 300, False)))
    selection.add("jets", ((ak.num(good_jets) >= 4) & (ak.num(good_jets) <= 6)))
    selection.add("0Lep", ((ak.num(good_electrons) == 0) & (ak.num(good_muons) == 0)))
    selection.add("2bjet", (ak.num(med_b) >= 2))
    selection.add("1tightbjet", (ak.num(tight_b) >= 1))
    # selection.add("jet_dr", ((top_two_dr < 4) & (top_two_dr > 2)).to_numpy())
    selection.add("b_dr", (med_dr > 1))
    return selection

In [4]:
events = createObjects(events)
mask = createSelection(events).all("highptjet", "jets", "0Lep", "2bjet", "1tightbjet", "b_dr")
events = events[mask]

NameError: name 'events' is not defined

### don't care + having fun + playing

In [5]:
scl = open('scaler.pkl', 'rb')
scaler = pickle.load(scl)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
jets = events.good_jets
flat_jets = ak.flatten(jets)

m3 = jets[:,1:4].sum()
m4 = jets[:,0:4].sum()

ones = ak.ones_like(jets.pt)

imap = {
    "features": {
        "jetOrdinality":    ak.flatten(ak.local_index(jets, axis=1)),
        "jetPT": 		    flat_jets.pt,
        "jetEta": 		    flat_jets.eta,
        "jetPhi": 		    flat_jets.phi,
        "jetBScore":    	flat_jets.btagDeepFlavB,
        "m3M": 			    ak.flatten(ones * m3.mass),
        "m3PT": 		    ak.flatten(ones * m3.pt),
        "m3Eta": 		    ak.flatten(ones * m3.eta),
        "m3Phi": 	        ak.flatten(ones * m3.phi),
        "m4M": 			    ak.flatten(ones * m4.mass),
        "m4PT":		        ak.flatten(ones * m4.pt),
        "m4Eta":		    ak.flatten(ones * m4.eta),
        "m4Phi":	        ak.flatten(ones * m4.phi)
    }
}

imap_concat = ak.concatenate([x[:, np.newaxis] for x in imap['features'].values()], axis=1)
imap_scaled = (imap_concat - scaler.mean_) / scaler.scale_
retmap = {
    "features": [imap_scaled]
}

NameError: name 'events' is not defined

In [10]:
retmap["features"]

[<Array [[-1.36, 1.22, ..., 0.168, 1.09], ...] type='20510 * 13 * float64'>]

In [11]:
imap_scaled

In [None]:
imap_concat - ak.Array(scaler.mean_)

In [12]:
model = jetAssignmentNN("traced_model.pt")

In [13]:
scores = model(events)



In [14]:
ak.unflatten(scores[:,0], ak.num(jets))

In [15]:
ak.unflatten(scores[:,1], ak.num(jets))

In [32]:
threshold_count = 50
threshold_vals = np.linspace(0.9, 1, 50).reshape((50, 1))
events_shape = ak.ones_like(jets[:, 0].pt)
jets_shape = ak.ones_like(ak.flatten(jets.pt))
threshold_arr_events_shape = np.multiply(events_shape, threshold_vals)
threshold_arr_jets_shape = np.multiply(jets_shape, threshold_vals)

In [33]:
threshold_arr_jets_shape

In [18]:
charg_scores = scores[:,1]

In [45]:
flat_jets = ak.flatten(jets)

In [102]:
flatjets_reshaped = ak.broadcast_arrays(flat_jets, threshold_arr_jets_shape)[0]

In [103]:
flatjets_reshaped

In [80]:
[charg_scores > threshold_vals]

[<Array [[True, False, True, ..., False, True], ...] type='50 * 20510 * bool'>]

In [87]:
ak.broadcast_arrays(flat_jets, jets_shape)[0]

In [96]:
nJets_reshaped = ak.broadcast_arrays(ak.num(jets), threshold_arr_events_shape)[0]

In [118]:
flatjets_reshaped

In [104]:
masked_jets = ak.mask(flatjets_reshaped, charg_scores > threshold_vals)

In [203]:
ak.broadcast_arrays(ak.num(jets), threshold_arr_events_shape)[0]

In [208]:
ak.Array(threshold_vals)

In [153]:
ak.flatten(masked_jets)

In [92]:
ak.num(jets)

In [136]:
ak.drop_none(ak.unflatten(masked_jets[0], nJets_reshaped[0])).sum(axis=1).mass

In [108]:
nJets_reshaped

In [115]:
ak.sum(ak.flatten(nJets_reshaped))

1025500

In [110]:
ak.sum(nJets_reshaped)

1025500

In [111]:
50*20510

1025500

In [211]:
threshold_count = 50
threshold_vals = np.linspace(0.9, 1, 50).reshape((50, 1))

# for M events with J total jets and N threshold values {n1, n2, ... nN},
# the following code gives an N by M array
# [ [n1, n1, ... , n1]
#   [n2, n2, ... , n2]
#   ...
#   [nN, nN, ... , nN] ]
# and an N by J array of the same format
events_shape = ak.ones_like(jets[:, 0].pt)
jets_shape = ak.ones_like(ak.flatten(jets.pt))
threshold_arr_events_shape = np.multiply(events_shape, threshold_vals)
threshold_arr_jets_shape = np.multiply(jets_shape, threshold_vals)

flat_jets = ak.flatten(jets)
flat_jets_reshaped = ak.broadcast_arrays(flat_jets, threshold_arr_jets_shape)[0]
nJets_reshaped = ak.broadcast_arrays(ak.num(jets), threshold_arr_events_shape)[0]
# nJets_reshaped gives [ak.num(jets)] * threshold_count

# turns all noncompliant jets to NoneType
# row 1 is the jets that pass the first threshold, etc.
# until row N is a bunch of Nones (since the final threshold is 1)
masked_jets = ak.mask(flat_jets_reshaped, charg_scores > threshold_vals)

# do not ask me why this works. i forgot it the moment i typed it out. it is an abomination.
# if you ask me why this works i will say "i forgot" and then we'll both be sad.
masked_jets_reshaped = ak.drop_none(ak.unflatten(masked_jets, ak.flatten(nJets_reshaped), axis=1))
nChargJets_arr = ak.num(masked_jets_reshaped, axis=2)
recoChargMass_arr = masked_jets_reshaped.sum().mass

flat_thresholds_arr = ak.flatten(threshold_arr_events_shape)
flat_nChargJets_arr = ak.flatten(nChargJets_arr)
flat_recomass_arr = ak.flatten(recoChargMass_arr)

In [212]:
flat_thresholds_arr

In [214]:
flat_nChargJets_arr

In [159]:
masked_jets

In [164]:
ak.sum(ak.num(jets))

20510

In [175]:
masked_jets

In [170]:
ak.num(jets)

In [171]:
ak.num(flatjets_reshaped, axis=1), ak.sum(ak.num(jets))

(<Array [20510, 20510, 20510, 20510, ..., 20510, 20510, 20510] type='50 * int64'>,
 20510)

In [205]:
ak.flatten(nJets_reshaped)

In [206]:
events_shape = ak.ones_like(jets[:, 0].pt)
jets_shape = ak.ones_like(ak.flatten(jets.pt))
threshold_arr_events_shape = np.multiply(events_shape, threshold_vals)
threshold_arr_jets_shape = np.multiply(jets_shape, threshold_vals)

flat_jets = ak.flatten(jets)
flat_jets_reshaped = ak.broadcast_arrays(flat_jets, threshold_arr_jets_shape)[0]
nJets_reshaped = ak.broadcast_arrays(ak.num(jets), threshold_arr_events_shape)[0]
# nJets_reshaped gives [ak.num(jets)] * threshold_count

# turns all noncompliant jets to NoneType
# row 1 is the jets that pass the first threshold, etc.
# until row N is a bunch of Nones (since the final threshold is 1)
masked_jets = ak.mask(flat_jets_reshaped, charg_scores > threshold_vals)

# do not ask me why this works. i forgot it the moment i typed it out. it is an abomination.
# if you ask me why this works i will say "i forgot" and then we'll both be sad.
masked_jets_reshaped = ak.drop_none(ak.unflatten(masked_jets, ak.flatten(nJets_reshaped), axis=1))
nChargJets_arr = ak.num(masked_jets_reshaped, axis=2)
recoChargMass_arr = masked_jets_reshaped.sum().mass

flat_thresholds_arr = ak.flatten(threshold_arr_events_shape)
flat_nChargJets_arr = ak.flatten(nChargJets_arr)
flat_recomass_arr = ak.flatten(recoChargMass_arr)

In [197]:
threshold_arr_events_shape

In [217]:
jets[:, 0:4].sum().mass

In [16]:
highest_3_charg_score_idx = ak.argsort(ak.unflatten(scores[:,1], ak.num(jets)), axis=1)[:, -3:]
highest_stop_score_idx = ak.argsort(ak.unflatten(scores[:,0], ak.num(jets)), axis=1)[:, -1]

In [22]:
top_3_charg_score_sum = jets[highest_3_charg_score_idx].sum()
m3_top_3_nn_charg_score = top_3_charg_score_sum.mass

In [23]:
nn4 = top_3_charg_score_sum + jets[ak.singletons(highest_stop_score_idx)]

In [26]:
stop_jets = jets[ak.singletons(highest_stop_score_idx)]

In [10]:
m14 = jets[:, 0:4].sum().mass

In [11]:
m14

In [35]:
ak.unflatten(scores[:,0], ak.num(jets))

In [32]:
highest_3_charg_score_idx

In [31]:
highest_stop_score_idx

In [18]:
high_charg_score_mask = ak.unflatten(scores[:,1] > 0.95, ak.num(jets))
highest_3_charg_score_idx = ak.argsort(ak.unflatten(scores[:,1], ak.num(jets)), axis=1)[:, -3:]
highest_stop_score_idx = ak.argsort(ak.unflatten(scores[:,0], ak.num(jets)), axis=1)[:, -1]

top_3_charg_score_sum = jets[highest_3_charg_score_idx].sum()
m3_top_3_nn_charg_score = top_3_charg_score_sum.mass

In [19]:
m3_top_3_nn_charg_score

In [27]:

ak.flatten((top_3_charg_score_sum + stop_jets).mass)

In [25]:
ak.flatten(nn4.mass)

In [47]:
stop_jet = jets[ak.singletons(highest_stop_score_idx)]