In [41]:
import pandas as pd
import numpy as np
import seaborn as sns
from pylorentz import Momentum4
import itertools

from matplotlib import pyplot as plt

top_mass = 173000

In [42]:
y_vars = ['y1_pt', 'y1_eta', 'y1_phi', 'y1_E', 'y2_pt', 'y2_eta', 'y2_phi', 'y2_E']
jet_vars = ['jet%d_pt','jet%d_eta','jet%d_phi', 'jet%d_E','jet%d_DLR1'] 
labels = []
for x in range(0,11):
    for var in jet_vars:
        labels.append(var %x)
labels = y_vars + labels

In [43]:
signal = pd.read_csv('yy_jj.csv', names=labels)

In [44]:
signal.head(5)

Unnamed: 0,y1_pt,y1_eta,y1_phi,y1_E,y2_pt,y2_eta,y2_phi,y2_E,jet0_pt,jet0_eta,...,jet9_pt,jet9_eta,jet9_phi,jet9_E,jet9_DLR1,jet10_pt,jet10_eta,jet10_phi,jet10_E,jet10_DLR1
0,45689.4,-2.22235,2.8735,213308.0,34327.6,-0.185603,-0.518296,34920.5,76765.1,1.60168,...,,,,,,,,,,
1,79195.1,-1.36626,-0.786426,165349.0,42374.6,-0.5145,2.60608,48107.9,126523.0,-1.33948,...,,,,,,,,,,
2,138646.0,-1.77453,3.10436,420587.0,41397.8,-1.73549,0.729734,121048.0,520327.0,-1.19417,...,,,,,,,,,,
3,103315.0,-0.640509,1.36108,125242.0,48145.5,-1.71822,-2.89602,138515.0,83354.4,-0.663305,...,,,,,,,,,,
4,85188.3,1.76442,-0.659135,255968.0,51800.2,1.06268,2.61873,83907.2,46918.6,0.99661,...,,,,,,,,,,


In [45]:
signal_extended = signal.copy()

In [46]:
signal_extended["num_of_jets"] = [int((len(row[row.notna()]) - 8)/5) for i, row in signal.iterrows()]

In [47]:
signal_extended["num_of_jets"]

0          6
1          4
2          5
3          4
4          4
          ..
1780125    5
1780126    5
1780127    4
1780128    4
1780129    5
Name: num_of_jets, Length: 1780130, dtype: int64

In [48]:
#number of jets that have pt <30GeV
num_jets_low = []
for i, row in signal_extended.iterrows():
    pt_lower_than_30 = 0
    jet_btag = 0
    for j in range(int(row["num_of_jets"])):
        if (row['jet%d_pt' %j] < 30000):
            pt_lower_than_30 += 1

    num_jets_low.append(pt_lower_than_30)


In [49]:
signal_extended["num_jets_low"] = num_jets_low

In [50]:
signal_output = []

def sort_jets(jet):
    return jet[1]

In [51]:
for i, row in signal_extended.iterrows():
    #filter out events with less than 4 jets
    if (row["num_of_jets"] - row ["num_jets_low"] <4):
        continue
    
    jets = []
    for index_of_jet in range(int(row["num_of_jets"])):
        if(row["jet%d_pt" %index_of_jet] >= 30000):
            jets.append([index_of_jet, row['jet%d_pt' %index_of_jet],row['jet%d_eta' %index_of_jet], row['jet%d_phi' %index_of_jet], row['jet%d_E' %index_of_jet], row['jet%d_DLR1' %index_of_jet]])
        
    jets.sort(key=sort_jets, reverse= True)
    jets = jets[0:4]

    #condition: keep events with at least one btagged jets
    DLR1s = [jet[-1] for jet in jets]
    if (max(DLR1s) < 2.2):
        continue

    jets_string = jets[0][1:] + jets[1][1:] + jets[2][1:] + jets[3][1:]
    output = []

    for var in y_vars:
        output.append(row[var])

    output = output + jets_string


    signal_output.append(output)









In [52]:
labels2 = []
for x in range(0,4):
    for var in jet_vars:
        labels2.append(var %x)
labels2 = y_vars + labels2

In [53]:
signal_output = pd.DataFrame(signal_output, columns=labels2)

In [54]:
signal_output.head(10)

Unnamed: 0,y1_pt,y1_eta,y1_phi,y1_E,y2_pt,y2_eta,y2_phi,y2_E,jet0_pt,jet0_eta,...,jet2_pt,jet2_eta,jet2_phi,jet2_E,jet2_DLR1,jet3_pt,jet3_eta,jet3_phi,jet3_E,jet3_DLR1
0,79746.2,-0.392429,2.44633,85965.9,31514.5,-1.71872,-0.717049,90709.7,51651.4,-0.630143,...,36272.9,-0.719251,0.309633,46557.2,-0.539798,31591.8,2.36606,0.071863,169877.0,6.40223
1,136525.0,0.944705,-3.01346,202115.0,49557.9,1.33116,-1.641,100345.0,151514.0,1.214,...,103772.0,0.237008,1.37664,107000.0,-4.49389,77253.6,-1.55752,0.76125,191669.0,-3.71187
2,86707.8,0.60025,3.04702,102803.0,79075.2,0.253099,-1.82555,81621.4,136599.0,0.350951,...,50878.6,-2.51378,1.09445,316374.0,-2.5309,35322.4,-0.861525,0.189441,49641.8,2.27487
3,107118.0,-0.571035,-0.569958,125062.0,51644.8,-0.544625,-2.42581,59495.3,221134.0,1.27661,...,41746.0,0.047322,-0.992804,42148.2,3.11697,34905.4,0.5788,0.475945,41239.4,-0.999947
4,103949.0,0.474513,1.97727,115873.0,27256.7,1.21577,-0.377037,50007.5,91406.3,1.68013,...,40432.0,3.42233,0.754473,620117.0,-2.19579,39644.0,0.179441,-2.11497,40719.0,-4.59308
5,111473.0,0.826362,-0.899193,151750.0,41619.4,0.199046,2.08242,42446.6,71275.7,0.253533,...,32254.0,4.063,2.34506,938051.0,-1.60866,31605.7,2.18165,3.05087,141996.0,2.58391
6,140345.0,0.35695,-1.1021,149381.0,81599.4,1.64727,-1.54197,219722.0,149956.0,-0.539918,...,53031.2,-1.26544,1.95829,101809.0,-1.5822,36452.6,2.14718,2.12205,158309.0,-3.15265
7,113041.0,1.58441,-1.87122,287210.0,41177.5,1.84472,1.68154,133506.0,65155.5,1.32877,...,47011.1,2.05137,0.059,185969.0,9.57517,30806.4,0.350397,2.30833,32886.8,0.090978
8,64019.9,-1.26932,0.483424,122901.0,33291.2,0.616757,-1.51699,39826.3,50593.9,1.84633,...,37012.8,1.95569,-3.0525,133612.0,7.18914,33146.8,1.58751,0.211848,84646.2,4.14786
9,67642.8,-0.838547,0.065199,92851.3,46169.2,0.257522,-3.12146,47708.5,77853.1,1.31568,...,39844.6,1.95946,1.60832,144298.0,-1.32511,31944.6,2.506,0.229785,197151.0,2.57466


In [55]:
signal_output.to_csv("background_filtered.csv", index= False)