In [16]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot

In [17]:
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [18]:
path_to_data = '../data/'

folder15 = 'breynold/user.breynold.data15_13TeV.00284484.physics_Main.DAOD_NTUP_JTRIG_JETM1.r9264_p3083_p3601_j042_tree.root/'
file15 = 'user.breynold.18753218._000001.tree.root'
folder16 = 'breynold/user.breynold.data16_13TeV.00307656.physics_Main.DAOD_NTUP_JTRIG_JETM1.r9264_p3083_p3601_j042_tree.root/'
file16 = 'user.breynold.18797259._000001.tree.root'

folder = 'doglioni/'
file = 'data18_13TeV.00355331.calibration_DataScouting_05_Jets.merge.AOD.r10657_p3592.root'
file = 'dijetISRphoton.root'
file = 'dijetISRphoton36k.root'

# Load a ROOT file
#filePath = path_to_data + folder16 + file16
filePath = path_to_data + folder + file
#ttree = uproot.open(filePath)['outTree']['nominal']
ttree = uproot.open(filePath)['CollectionTree']

In [19]:
ttree.keys()

[b'n_jet',
 b'jet_pt',
 b'jet_eta',
 b'jet_phi',
 b'jet_e',
 b'jet_Jvt',
 b'jet_btagged',
 b'n_photons',
 b'photons_pt',
 b'photons_eta',
 b'photons_phi',
 b'photons_e',
 b'photons_etcone20',
 b'photons_ptcone30',
 b'mc_event_weight',
 b'weight',
 b'av_int_per_xing',
 b'num_pv',
 b'run_number',
 b'lumi_block',
 b'mc_channel_number',
 b'event_number']

In [20]:
branchnames = ['nAntiKt4EMTopoJets_Calib2018',
               'AntiKt4EMTopoJets_Calib2018_E',
               'AntiKt4EMTopoJets_Calib2018_pt',
               'AntiKt4EMTopoJets_Calib2018_phi',
               'AntiKt4EMTopoJets_Calib2018_eta']


#branchnames = ['nDSTrigJet',
#               'DSTrigJet_E',
#              'DSTrigJet_pt',
#              'DSTrigJet_phi',
#              'DSTrigJet_eta']

branchnames = ['n_jet',
               'jet_e',
               'jet_pt',
               'jet_phi',
               'jet_eta',
               'n_photons',
               'photons_e',
               'photons_pt',
               'photons_phi',
               'photons_eta']

jaggedE = ttree.array(branchnames[1])
jaggedpT = ttree.array(branchnames[2])
jaggedphi = ttree.array(branchnames[3])
jaggedeta = ttree.array(branchnames[4])

In [21]:
jaggedE_ph = ttree.array(branchnames[6])
jaggedpT_ph = ttree.array(branchnames[7])
jaggedphi_ph = ttree.array(branchnames[8])
jaggedeta_ph = ttree.array(branchnames[9])

In [22]:
def get_leading(jaggedX):
    return jaggedX[jaggedX.counts > 0, 0]

In [23]:
leading_E = get_leading(jaggedE)
leading_pT = get_leading(jaggedpT)
leading_phi = get_leading(jaggedphi)
leading_eta = get_leading(jaggedeta)

In [24]:
leading_ph_E = get_leading(jaggedE_ph)
leading_ph_pT = get_leading(jaggedpT_ph)
leading_ph_phi = get_leading(jaggedphi_ph)
leading_ph_eta = get_leading(jaggedeta_ph)

In [25]:
print(leading_E.shape, leading_eta.shape, leading_phi.shape, leading_pT.shape)

(36961,) (36961,) (36961,) (36961,)


In [26]:
leading_ph_E.shape

(37100,)

In [27]:
df1 = pd.DataFrame(data = {'jet pT': leading_pT, 'jet eta': leading_eta, 'jet phi': leading_phi, 'jet E': leading_E})

In [28]:
df2 = pd.DataFrame(data = {'photon pT': leading_ph_pT, 'photon eta': leading_ph_eta, 'photon phi': leading_ph_phi, 'photon E': leading_ph_E})

In [29]:
df1.head()

Unnamed: 0,jet pT,jet eta,jet phi,jet E
0,72432.320312,0.425758,1.136822,79456.0
1,129755.570312,0.71789,-1.579304,165034.421875
2,95613.0,-0.07841,-1.341217,96232.96875
3,108035.71875,-2.594386,0.473218,727261.9375
4,148432.25,1.78791,-1.924926,456764.90625


In [30]:
df2.head()

Unnamed: 0,photon pT,photon eta,photon phi,photon E
0,48776.523438,-0.66213,0.473933,59865.101562
1,73370.265625,0.850158,2.940618,101521.148438
2,12412.353516,0.866865,1.964844,17375.46875
3,28775.3125,-0.707102,-2.67377,36273.816406
4,75852.6875,0.247798,1.205002,78193.460938


In [13]:
train, test = train_test_split(df, test_size=0.2, random_state=41)

In [14]:
print(train.shape, test.shape)

(29568, 8) (7393, 8)


In [32]:
# Save train and test sets
#df1.to_pickle('processed_data/dijetISRphoton36k_jets.pkl')
#df2.to_pickle('processed_data/dijetISRphoton36k_photons.pkl')
# train = pd.read_pickle('processed_data/train.pkl')
# test = pd.read_pickle('processed_data/test.pkl')

In [15]:
df.head()

Unnamed: 0,jet pT,jet eta,jet phi,jet E,photon pT,photon eta,photon phi,photon E
0,72432.320312,0.425758,1.136822,79456.0,72432.320312,0.425758,1.136822,79456.0
1,129755.570312,0.71789,-1.579304,165034.421875,129755.570312,0.71789,-1.579304,165034.421875
2,95613.0,-0.07841,-1.341217,96232.96875,95613.0,-0.07841,-1.341217,96232.96875
3,108035.71875,-2.594386,0.473218,727261.9375,108035.71875,-2.594386,0.473218,727261.9375
4,148432.25,1.78791,-1.924926,456764.90625,148432.25,1.78791,-1.924926,456764.90625
