In [1]:
from variables_calculator import variables_calculator
import uproot

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# First we work with the signal data 
file = "/tf/Higgs-Boson-LHC-Collision-Detector/sigfcc_350.root"

data = uproot.open(file)
tree = data["Delphes"]
particles = tree["Jet/Jet.PT"].array(library = "np")
len(particles)

500000

In [3]:
lead_sublead_jets = variables_calculator.leading_n_subleading_jets(file)
PRI_jet_num = variables_calculator.PRI_jet_num(file)
PRI_jet_all = variables_calculator.PRI_jet_all_pt(file)
met = variables_calculator.met(file)

jets_data = pd.merge(lead_sublead_jets, PRI_jet_num, on = "event_1")
jets_data = pd.merge(jets_data, PRI_jet_all, on = "event_1")
jets_data = pd.merge(jets_data, met)

jets_data = variables_calculator.jets_inv_mass(jets_data)
jets_data["label"] = np.full(len(jets_data), 1)
jets_data.fillna(0, inplace = True)

jets_data.head()

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,PRI_jet_leading_pt,PRI_jet_subleading_pt,PRI_jet_leading_eta,PRI_jet_subleading_eta,PRI_jet_leading_phi,PRI_jet_subleading_phi,event_1,PRI_jet_num,PRI_jet_all_pt,PRI_met,PRI_met_phi,DER_mass_jet_jet,label
0,97.789627,88.065651,0.87881,-0.73657,2.420791,-0.375482,0,3,205.819443,29.468515,-1.191945,247.457836,1
1,69.906853,64.256523,1.287713,-0.113773,0.435103,2.964018,1,3,170.760605,4.302101,-1.388696,163.397858,1
2,89.023056,83.168976,-0.94632,-0.037001,0.552948,-3.103319,2,3,213.831802,3.446326,-2.115567,185.076162,1
3,137.439438,127.212677,-0.173504,0.348251,-0.170474,-3.009968,3,3,304.227875,4.129518,2.948901,270.594198,1
4,49.592335,46.715462,-1.677774,1.727845,-1.408155,1.710724,4,2,96.3078,3.077592,2.085407,272.981686,1


In [4]:
muon_data = variables_calculator.inv_m(file, "muon", "muon")
electron_data = variables_calculator.inv_m(file, "electron", "electron")
lepton_data = pd.concat([muon_data, electron_data])

In [5]:
lepton_data = lepton_data[["event_1", "inv_m"]].copy()
lepton_data.columns = ["event_1", "DER_mass_lep"]
lepton_data.head()

Unnamed: 0,event_1,DER_mass_lep
0,0,92.925507
1,3,82.369003
2,4,92.662231
3,7,89.298271
4,8,88.912941


In [6]:
signal = pd.merge(jets_data, lepton_data, on = "event_1")
signal = signal.drop(columns = ["event_1"])
signal.head()

Unnamed: 0,PRI_jet_leading_pt,PRI_jet_subleading_pt,PRI_jet_leading_eta,PRI_jet_subleading_eta,PRI_jet_leading_phi,PRI_jet_subleading_phi,PRI_jet_num,PRI_jet_all_pt,PRI_met,PRI_met_phi,DER_mass_jet_jet,label,DER_mass_lep
0,97.789627,88.065651,0.87881,-0.73657,2.420791,-0.375482,3,205.819443,29.468515,-1.191945,247.457836,1,92.925507
1,69.906853,64.256523,1.287713,-0.113773,0.435103,2.964018,3,170.760605,4.302101,-1.388696,163.397858,1,85.018837
2,89.023056,83.168976,-0.94632,-0.037001,0.552948,-3.103319,3,213.831802,3.446326,-2.115567,185.076162,1,102.905479
3,137.439438,127.212677,-0.173504,0.348251,-0.170474,-3.009968,3,304.227875,4.129518,2.948901,270.594198,1,82.369003
4,49.592335,46.715462,-1.677774,1.727845,-1.408155,1.710724,2,96.3078,3.077592,2.085407,272.981686,1,92.662231


In [7]:
signal.to_csv("Signal.csv")
signal.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 412387 entries, 0 to 412386
Data columns (total 13 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   PRI_jet_leading_pt      412387 non-null  float32
 1   PRI_jet_subleading_pt   412387 non-null  float64
 2   PRI_jet_leading_eta     412387 non-null  float32
 3   PRI_jet_subleading_eta  412387 non-null  float64
 4   PRI_jet_leading_phi     412387 non-null  float32
 5   PRI_jet_subleading_phi  412387 non-null  float64
 6   PRI_jet_num             412387 non-null  int64  
 7   PRI_jet_all_pt          412387 non-null  float32
 8   PRI_met                 412387 non-null  float32
 9   PRI_met_phi             412387 non-null  float32
 10  DER_mass_jet_jet        412387 non-null  float64
 11  label                   412387 non-null  int64  
 12  DER_mass_lep            412387 non-null  float32
dtypes: float32(7), float64(4), int64(2)
memory usage: 29.9 MB


In [8]:
file = "/tf/Higgs-Boson-LHC-Collision-Detector/bgd240zh.root"

data = uproot.open(file)
tree = data["Delphes"]
particles = tree["Jet/Jet.PT"].array(library = "np")
len(particles)

200000

In [9]:
lead_sublead_jets = variables_calculator.leading_n_subleading_jets(file)
PRI_jet_num = variables_calculator.PRI_jet_num(file)
PRI_jet_all = variables_calculator.PRI_jet_all_pt(file)
met = variables_calculator.met(file)

jets_data = pd.merge(lead_sublead_jets, PRI_jet_num, on = "event_1")
jets_data = pd.merge(jets_data, PRI_jet_all, on = "event_1")
jets_data = pd.merge(jets_data, met)

jets_data = variables_calculator.jets_inv_mass(jets_data)
jets_data["label"] = np.full(len(jets_data), 0)
jets_data.fillna(0, inplace = True)

muon_data = variables_calculator.inv_m(file, "muon", "muon")
electron_data = variables_calculator.inv_m(file, "electron", "electron")
lepton_data = pd.concat([muon_data, electron_data])

lepton_data = lepton_data[["event_1", "inv_m"]].copy()
lepton_data.columns = ["event_1", "DER_mass_lep"]

background = pd.merge(jets_data, lepton_data, on = "event_1")
background = background.drop(columns = ["event_1"])

background.to_csv("Background.csv")
background.info()

  result = getattr(ufunc, method)(*inputs, **kwargs)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 141774 entries, 0 to 141773
Data columns (total 13 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   PRI_jet_leading_pt      141774 non-null  float64
 1   PRI_jet_subleading_pt   141774 non-null  float64
 2   PRI_jet_leading_eta     141774 non-null  float64
 3   PRI_jet_subleading_eta  141774 non-null  float64
 4   PRI_jet_leading_phi     141774 non-null  float64
 5   PRI_jet_subleading_phi  141774 non-null  float64
 6   PRI_jet_num             141774 non-null  int64  
 7   PRI_jet_all_pt          141774 non-null  float32
 8   PRI_met                 141774 non-null  float32
 9   PRI_met_phi             141774 non-null  float32
 10  DER_mass_jet_jet        141774 non-null  float64
 11  label                   141774 non-null  int64  
 12  DER_mass_lep            141774 non-null  float32
dtypes: float32(4), float64(7), int64(2)
memory usage: 11.9 MB


In [10]:
signal.head()

Unnamed: 0,PRI_jet_leading_pt,PRI_jet_subleading_pt,PRI_jet_leading_eta,PRI_jet_subleading_eta,PRI_jet_leading_phi,PRI_jet_subleading_phi,PRI_jet_num,PRI_jet_all_pt,PRI_met,PRI_met_phi,DER_mass_jet_jet,label,DER_mass_lep
0,97.789627,88.065651,0.87881,-0.73657,2.420791,-0.375482,3,205.819443,29.468515,-1.191945,247.457836,1,92.925507
1,69.906853,64.256523,1.287713,-0.113773,0.435103,2.964018,3,170.760605,4.302101,-1.388696,163.397858,1,85.018837
2,89.023056,83.168976,-0.94632,-0.037001,0.552948,-3.103319,3,213.831802,3.446326,-2.115567,185.076162,1,102.905479
3,137.439438,127.212677,-0.173504,0.348251,-0.170474,-3.009968,3,304.227875,4.129518,2.948901,270.594198,1,82.369003
4,49.592335,46.715462,-1.677774,1.727845,-1.408155,1.710724,2,96.3078,3.077592,2.085407,272.981686,1,92.662231


In [11]:
background.head()

Unnamed: 0,PRI_jet_leading_pt,PRI_jet_subleading_pt,PRI_jet_leading_eta,PRI_jet_subleading_eta,PRI_jet_leading_phi,PRI_jet_subleading_phi,PRI_jet_num,PRI_jet_all_pt,PRI_met,PRI_met_phi,DER_mass_jet_jet,label,DER_mass_lep
0,61.858978,50.390411,0.001605,-1.163691,1.761646,-1.186303,3,128.668365,0.903545,0.342974,130.712745,0,92.865944
1,59.200146,57.255505,0.018362,-0.708189,-1.146716,2.447471,4,199.598618,1.933875,-0.978839,121.428789,0,91.787567
2,64.045975,57.252205,-0.505538,0.144685,-2.849891,1.336503,3,153.353973,29.304701,-0.519342,112.344104,0,90.618088
3,75.073952,68.324547,-0.348803,0.397656,-1.431973,1.396415,3,144.677231,22.149502,2.813292,151.696408,0,90.586159
4,63.593616,41.097408,0.61812,0.906702,2.578294,-1.001227,3,136.130844,0.212966,0.644359,100.896154,0,89.680992
