In [1]:
import h5py
import numpy as np
import vector
import awkward as ak
import uproot
file = h5py.File('callum_train.h5', 'r')

# Access the dataset
number_jets = np.array(file['atlas/njets'])
met = np.array(file["atlas/MET"])
eventnumber = np.array(file["atlas/eventNumber"])
jets = np.array(file["atlas/jets"])
leptons = np.array(file["atlas/leptons"])
neutrinos = np.array(file["atlas/neutrinos"])

In [2]:
#truth_data
tree = uproot.open("ttbar_skimmed_july9_v2.root:truth")

In [3]:
lepton_pt = leptons['pt']
lepton_eta = leptons['eta']
lepton_phi = leptons['phi']
lepton_energy = leptons['energy']
lepton_charge = leptons['charge']
lepton_type = leptons['type']
jets_pt = jets['pt']
jets_eta = jets['eta']
jets_phi = jets['phi']
jets_energy = jets['energy']
jets_isbtagged = jets["is_tagged"]

In [4]:
#make masks
two_b_jets_mask = np.sum(jets_isbtagged,axis=1)==2
two_opposite_leptons_mask = lepton_charge[:,0]+lepton_charge[:,1] == 0
clear_up_mask = two_opposite_leptons_mask & two_b_jets_mask
jets_which_are_btagged = jets_isbtagged[clear_up_mask]

In [5]:
recon_j1=vector.zip({'pt': jets_pt[clear_up_mask][jets_which_are_btagged][::2], 'eta': jets_eta[clear_up_mask][jets_which_are_btagged][::2], 'phi': jets_phi[clear_up_mask][jets_which_are_btagged][::2], 'e': jets_energy[clear_up_mask][jets_which_are_btagged][::2]})
recon_j2=vector.zip({'pt': jets_pt[clear_up_mask][jets_which_are_btagged][1::2], 'eta': jets_eta[clear_up_mask][jets_which_are_btagged][1::2], 'phi': jets_phi[clear_up_mask][jets_which_are_btagged][1::2], 'e': jets_energy[clear_up_mask][jets_which_are_btagged][1::2]})
recon_leptons=vector.zip({'pt': lepton_pt, 'eta': lepton_eta, 'phi': lepton_phi, 'e': lepton_energy})[clear_up_mask]

In [6]:
#now get truth data
match_correct_events_mask = np.isin(tree["event_Number"].array(), eventnumber[clear_up_mask])
b_pt = tree["b_pt"].array()[match_correct_events_mask]
b_eta = tree["b_eta"].array()[match_correct_events_mask]
b_phi = tree["b_phi"].array()[match_correct_events_mask]
b_mass = tree["b_mass"].array()[match_correct_events_mask]
bbar_pt = tree["bbar_pt"].array()[match_correct_events_mask]
bbar_eta = tree["bbar_eta"].array()[match_correct_events_mask]
bbar_phi = tree["bbar_phi"].array()[match_correct_events_mask]
bbar_mass = tree["bbar_mass"].array()[match_correct_events_mask]
truth_b = vector.zip({'pt': b_pt, 'eta': b_eta, 'phi': b_phi, 'm': b_mass})
truth_bbar = vector.zip({'pt': bbar_pt, 'eta': bbar_eta, 'phi': bbar_phi, 'm': bbar_mass})

In [7]:
#truth matching jets
b_1 = recon_j1.deltaR(truth_b)
bbar_1 = recon_j1.deltaR(truth_bbar)
b_2 = recon_j2.deltaR(truth_b)
bbar_2 = recon_j2.deltaR(truth_bbar)

basic_dr_mask = ((b_1<0.4) & (bbar_2<0.4)) | ((b_2<0.4) & (bbar_1<0.4)) #this needs to be matched otherwise events are not truth events

lepton_positive_charge_mask = lepton_charge[clear_up_mask]==1
lepton_negative_charge_mask = lepton_charge[clear_up_mask]==-1
recon_lep_positive = recon_leptons[lepton_positive_charge_mask][basic_dr_mask]
recon_lep_negative = recon_leptons[lepton_negative_charge_mask][basic_dr_mask]

recon_j1_basic_clear = recon_j1[basic_dr_mask]
recon_j2_basic_clear = recon_j2[basic_dr_mask]
j1_is_b_mask = b_1[basic_dr_mask]<bbar_1[basic_dr_mask]

recon_b = np.where(j1_is_b_mask, recon_j1_basic_clear, recon_j2_basic_clear)
recon_bbar = np.where(j1_is_b_mask, recon_j2_basic_clear, recon_j1_basic_clear)

In [8]:
#detla R
signal_delta_r_blep_positive = recon_lep_positive.deltaR(recon_b)
signal_delta_r_bbarlep_negative = recon_lep_negative.deltaR(recon_bbar)
background_delta_r_bbarlep_positive = recon_lep_positive.deltaR(recon_bbar)
background_delta_r_blep_negative = recon_lep_negative.deltaR(recon_b)

#bl invariant mass
signal_invmass_blep_positive = (recon_b+recon_lep_positive).m
signal_invmass_bbarlep_negative = (recon_bbar+recon_lep_negative).m
background_invmass_bbarlep_positive = (recon_bbar+recon_lep_positive).m
background_invmass_blep_negative =(recon_b+recon_lep_negative).m

#delta eta 
signal_delta_eta_blep_positive = recon_lep_positive.deltaeta(recon_b)
signal_delta_eta_bbarlep_negative = recon_lep_negative.deltaeta(recon_bbar)
background_delta_eta_bbarlep_positive = recon_lep_positive.deltaeta(recon_bbar)
background_delta_eta_blep_negative = recon_lep_negative.deltaeta(recon_b)

#combined pt of bl
signal_combined_pt_blep_positive = (recon_lep_positive+recon_b).pt
signal_combined_pt_bbarlep_negative = (recon_lep_negative+recon_bbar).pt
background_combined_pt_bbarlep_positive = (recon_lep_positive+recon_bbar).pt
background_combined_pt_blep_negative = (recon_lep_negative+recon_b).pt
#delta_r between clusters
signal_cluster_delta_r_blep_positive = (recon_lep_positive+recon_b).deltaR(recon_lep_negative+recon_bbar)
signal_cluster_delta_r_bbarlep_negative = (recon_lep_positive+recon_b).deltaR(recon_lep_negative+recon_bbar)
background_cluster_delta_r_bbarlep_positive = (recon_lep_positive+recon_bbar).deltaR(recon_lep_negative+recon_b)
background_cluster_delta_r_blep_negative = (recon_lep_positive+recon_bbar).deltaR(recon_lep_negative+recon_b)

#delta_eta between clusters
signal_cluster_delta_eta_blep_positive = (recon_lep_positive+recon_b).deltaeta(recon_lep_negative+recon_bbar)
signal_cluster_delta_eta_bbarlep_negative = (recon_lep_positive+recon_b).deltaeta(recon_lep_negative+recon_bbar)
background_cluster_delta_eta_bbarlep_positive = (recon_lep_positive+recon_bbar).deltaeta(recon_lep_negative+recon_b)
background_cluster_delta_eta_blep_negative = (recon_lep_positive+recon_bbar).deltaeta(recon_lep_negative+recon_b)


#magnitude of delta_eta + delta_R
signal_mag_de_dr_blep_positive = np.abs(signal_delta_eta_blep_positive + signal_delta_r_blep_positive)
signal_mag_de_dr_bbarlep_negative = np.abs(signal_delta_eta_bbarlep_negative + signal_delta_r_bbarlep_negative)
background_mag_de_dr_bbarlep_positive = np.abs(background_delta_eta_bbarlep_positive + background_delta_r_bbarlep_positive)
background_mag_de_dr_blep_negative = np.abs(background_delta_eta_blep_negative + background_delta_r_blep_negative)

#bl energy
signal_energy_blep_positive = (recon_b+recon_lep_positive).e
signal_energy_bbarlep_negative = (recon_bbar+recon_lep_negative).e
background_energy_bbarlep_positive = (recon_bbar+recon_lep_positive).e
background_energy_blep_negative =(recon_b+recon_lep_negative).e

#delta angle
signal_delta_angle_blep_positive = recon_lep_positive.deltaangle(recon_b)
signal_delta_angle_bbarlep_negative = recon_lep_negative.deltaangle(recon_bbar)
background_delta_angle_bbarlep_positive = recon_lep_positive.deltaangle(recon_bbar)
background_delta_angle_blep_negative = recon_lep_negative.deltaangle(recon_b)

In [16]:
file.close()
h5_file.close()
import pandas as pd
h5_file = h5py.File("DNN_signal.h5", 'w')
group = h5_file.create_group('Objects')
group.create_dataset("delta_r_b", data=signal_delta_r_blep_positive[:])
group.create_dataset("bl_invmass_b", data=signal_invmass_blep_positive[:])
group.create_dataset("delta_eta_b", data=signal_delta_eta_blep_positive[:])
group.create_dataset("bl_pt_b", data=signal_combined_pt_blep_positive[:])
group.create_dataset("cluster_delta_r_b", data=signal_cluster_delta_r_blep_positive[:])
group.create_dataset("cluster_delta_eta_b", data=signal_cluster_delta_eta_blep_positive[:])
group.create_dataset("mag_de_dr_b", data=signal_mag_de_dr_blep_positive[:])
group.create_dataset("delta_r_bbar", data=signal_delta_r_bbarlep_negative[:])
group.create_dataset("bl_invmass_bbar", data=signal_invmass_bbarlep_negative[:])
group.create_dataset("delta_eta_bar", data=signal_delta_eta_bbarlep_negative[:])
group.create_dataset("bl_pt_bbar", data=signal_combined_pt_bbarlep_negative[:])
group.create_dataset("cluster_delta_r_bbar", data=signal_cluster_delta_r_bbarlep_negative[:])
group.create_dataset("cluster_delta_eta_bbar", data=signal_cluster_delta_eta_bbarlep_negative[:])
group.create_dataset("mag_de_dr_bbar", data=signal_mag_de_dr_bbarlep_negative[:])
group.create_dataset("delta_angle_b", data=signal_delta_angle_blep_positive[:])
group.create_dataset("delta_angle_bbar", data=signal_delta_angle_bbarlep_negative[:])
group.create_dataset("bl_energy_b", data=signal_energy_blep_positive[:])
group.create_dataset("bl_energy_bbar", data=signal_energy_bbarlep_negative[:])

h5_file.close()

h5_file = h5py.File("DNN_background.h5", 'w')
group = h5_file.create_group('Objects')
group.create_dataset("delta_r_b", data=background_delta_r_blep_negative[:])
group.create_dataset("bl_invmass_b", data=background_invmass_blep_negative[:])
group.create_dataset("delta_eta_b", data=background_delta_eta_blep_negative[:])
group.create_dataset("bl_pt_b", data=background_combined_pt_blep_negative[:])
group.create_dataset("cluster_delta_r_b", data=background_cluster_delta_r_blep_negative[:])
group.create_dataset("cluster_delta_eta_b", data=background_cluster_delta_eta_blep_negative[:])
group.create_dataset("mag_de_dr_b", data=background_mag_de_dr_blep_negative[:])
group.create_dataset("delta_r_bbar", data=background_delta_r_bbarlep_positive[:])
group.create_dataset("bl_invmass_bbar", data=background_invmass_bbarlep_positive[:])
group.create_dataset("delta_eta_bar", data=background_delta_eta_bbarlep_positive[:])
group.create_dataset("bl_pt_bbar", data=background_combined_pt_bbarlep_positive[:])
group.create_dataset("cluster_delta_r_bbar", data=background_cluster_delta_r_bbarlep_positive[:])
group.create_dataset("cluster_delta_eta_bbar", data=background_cluster_delta_eta_bbarlep_positive[:])
group.create_dataset("mag_de_dr_bbar", data=background_mag_de_dr_bbarlep_positive[:])
group.create_dataset("delta_angle_b", data=background_delta_angle_blep_negative[:])
group.create_dataset("delta_angle_bbar", data=background_delta_angle_bbarlep_positive[:])
group.create_dataset("bl_energy_b", data=background_energy_blep_negative[:])
group.create_dataset("bl_energy_bbar", data=background_energy_bbarlep_positive[:])
h5_file.close()

samples=["DNN_signal","DNN_background"]
variables=["delta_r_b","bl_invmass_b", "delta_eta_b", "bl_pt_b", "cluster_delta_r_b", "cluster_delta_eta_b", "mag_de_dr_b", "delta_r_bbar", "bl_invmass_bbar", "delta_eta_bar", "bl_pt_bbar", "cluster_delta_r_bbar", "cluster_delta_eta_bbar", "mag_de_dr_bbar","delta_angle_bbar", "delta_angle_b","bl_energy_b","bl_energy_bbar"]
DataFrames = {}
for s in samples:
    h5_file = h5py.File(s + ".h5", 'r')
    group = h5_file['Objects']
    data_dict = {}
    
    for v in variables:
        data = group[v][:]
        data_dict[v] = data

    df = pd.DataFrame(data_dict)
    df.insert(0, 'entry', range(len(df)))
    DataFrames[s] = df
    
    DataFrames["DNN_signal"]
    h5_file.close()

In [15]:
DataFrames["DNN_signal"]

Unnamed: 0,entry,delta_r_b,bl_invmass_b,delta_eta_b,bl_pt_b,cluster_delta_r_b,cluster_delta_eta_b,mag_de_dr_b,delta_r_bbar,bl_invmass_bbar,delta_eta_bar,bl_pt_bbar,cluster_delta_r_bbar,cluster_delta_eta_bbar,mag_de_dr_bbar,delta_angle_bbar,delta_angle_b,bl_energy_b,bl_energy_bbar
0,0,3.009694,109.694724,-0.656952,71.622040,3.184023,1.913775,2.352742,1.046753,93.935549,0.785908,167.504340,3.184023,1.913775,1.832661,0.812234,2.244538,160.032238,249.404648
1,1,2.963093,78.751473,-0.363876,17.030089,5.332934,4.616188,2.599217,1.841729,89.550348,0.325131,102.003412,5.332934,4.616188,2.166860,1.188511,0.632528,269.014114,206.533855
2,2,2.118486,102.160950,1.340437,69.063472,2.792119,-1.169477,3.458923,1.383178,97.259136,-1.331596,129.100070,2.792119,-1.169477,0.051583,1.128707,1.884091,126.226124,197.034267
3,3,2.935242,144.870343,1.114129,27.057110,3.543299,-3.448432,4.049371,3.223134,148.624577,1.543153,76.712689,3.543299,-3.448432,4.766287,2.666542,0.897083,372.483650,168.090893
4,4,1.369069,86.817204,0.352896,148.436006,3.457043,1.478277,1.721965,1.516627,132.712026,-0.562766,208.339135,3.457043,1.478277,0.953861,1.397287,0.682103,298.869347,250.724854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1045885,1045885,1.532843,51.278416,0.484111,80.070300,3.109683,2.277995,2.016955,1.801786,48.879228,0.772747,42.838741,3.109683,2.277995,2.574533,0.986677,0.993583,125.070810,103.319721
1045886,1045886,1.729619,93.513148,1.564443,118.591299,2.343813,-0.727823,3.294062,1.492592,53.286920,-0.357298,65.699063,2.343813,-0.727823,1.135294,1.360852,0.866401,222.299965,88.703068
1045887,1045887,1.490190,61.566924,-0.778191,73.964614,2.972123,2.959591,0.711999,1.404191,57.149017,-0.203657,74.204282,2.972123,2.959591,1.200534,1.079622,0.467339,326.995659,115.194489
1045888,1045888,1.843868,110.506598,-1.826227,132.210999,3.216977,-0.919613,0.017641,0.589980,33.506270,-0.403515,139.915894,3.216977,-0.919613,0.186464,0.483892,1.390083,312.625525,162.149563


In [None]:
2091780/2

1045890.0