# Test notebook for Sx operations.

In [17]:
import servicex
from servicex import ServiceXDataset
from servicex.minio_adaptor import MinioAdaptor
from servicex.servicex_adaptor import ServiceXAdaptor
from func_adl_xAOD import ServiceXDatasetSource
import uproot_methods
from numpy import genfromtxt
import qastle
import csv
import time
import math

### Datasets:

localds://mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00

user.emmat:user.emmat.mc16_13TeV.311311.MadGraphPythia8EvtGen_A14NNPDF31LO_HSS_LLP_mH125_mS15.mc16d.200131.forsX_trees.root

In [7]:
def test_retrieve_simple_jet_pts():
    query = "(call ResultTTree (call Select (call SelectMany (call EventDataset (list 'localds:bogus')) (lambda (list e) (call (attr e 'Jets') 'AntiKt4EMTopoJets'))) (lambda (list j) (/ (call (attr j 'pt')) 1000.0))) (list 'JetPt') 'analysis' 'junk.root')"
    dataset = ServiceXDataset("mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00")
    r = dataset.get_data_pandas_df(query)
    
    return r

In [9]:
data_xaod = test_retrieve_simple_jet_pts()

In [10]:
print(data_xaod)

            JetPt
entry            
0       52.898621
1       28.133312
2       15.308400
3        5.986582
4        5.813203
...           ...
857133   6.211655
857134  47.653145
857135  32.738951
857136   6.260789
857137   5.394783

[11355980 rows x 1 columns]


In [32]:
def test_old_simple_jet_pts():
    dataset = ServiceXDataset("mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00")
    query = ServiceXDatasetSource(dataset) \
        .SelectMany('lambda e: (e.Jets("AntiKt4EMTopoJets"))') \
        .Where('lambda j: (j.pt()/1000)>30') \
        .Select('lambda j: (j.pt())') \
        .AsPandasDF("JetPt") \
        .value()
    return query

def test_retrieve_lepton_data():
    dataset = ServiceXDataset("mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00")
    query = ServiceXDatasetSource(dataset)	\
        .Select('lambda e: (e.Electrons("Electrons"))') \
        .Select('lambda e: (e.pt(), \
                            e.eta(), \
							e.phi(), \
							e.e())') \
        .AsAwkwardArray(('ElePt', 'EleEta', 'ElePhi', 'EleE')) \
        .value()
    return query

In [33]:
lepton_test = test_retrieve_lepton_data()

HBox(children=(FloatProgress(value=0.0, description='mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTE…

HBox(children=(FloatProgress(value=0.0, description='        Downloaded', layout=Layout(flex='2'), max=9000000…

ServiceXException: (ServiceXException(...), 'Failed to transform all files')

In [29]:
print(lepton_test)

{b'ElePt': <JaggedArray [[56933.7265625 30103.556640625 7957.18994140625] [8510.5205078125 4297.3701171875 3224.6416015625] [31541.15234375] ... [4459.81787109375] [52486.60546875 23942.8203125 19794.88671875] []] at 0x0221309b15c8>, b'EleEta': <JaggedArray [[-0.12912268936634064 0.5888139605522156 1.2514770030975342] [0.675038754940033 0.2761063873767853 0.12661254405975342] [-2.275493621826172] ... [0.18539121747016907] [-0.1793559193611145 1.324408769607544 -0.062219955027103424] []] at 0x0221309b1e48>, b'ElePhi': <JaggedArray [[2.520068883895874 -0.45167723298072815 -0.7712351083755493] [0.8112514615058899 -2.0128047466278076 -1.8641725778579712] [2.1214284896850586] ... [2.7884395122528076] [1.3793280124664307 -2.747081756591797 -1.044033169746399] []] at 0x022130a3f088>, b'EleE': <JaggedArray [[57409.0049476159 35474.56113861621 15045.390911737933] [10524.304487067426 4462.2178674942725 3250.5228782492427] [155111.00184215332] ... [4536.679420621884] [53333.07986649214 48195.9617

In [13]:
def test_func_adl_simple_jet_pts():
    dataset = ServiceXDataset("mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00", image="sslhep/servicex_func_adl_xaod_transformer:pr_atlas_rpm_bug_fix")
    query = ServiceXDatasetSource(dataset) \
        .Where('lambda e: e.Jets("AntiKt4EMTopoJets").Where(lambda j: j.pt() / 1000.0 > 30.0).Count() > 1') \
        .Select('lambda e: e.Electrons("Electrons")') \
        .Select('lambda e: e.Select(lambda ele: ele.eta())') \
        .AsAwkwardArray('EleEta') \
        .value()
    return query

#        .Select('lambda ls: (ls[1].Select(lambda e: e.eta()).Where(ls[0].Select(lambda j: j.pt()).Count() > 2))') \



In [14]:
t0 = time.process_time()
data = test_func_adl_simple_jet_pts()
t1 = time.process_time()

HBox(children=(FloatProgress(value=0.0, description='mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTE…

HBox(children=(FloatProgress(value=0.0, description='        Downloaded', layout=Layout(flex='2'), max=9000000…

In [15]:
print(data)
print(t1 - t0)

{b'EleEta': <JaggedArray [[] [0.675038754940033 0.2761063873767853 0.12661254405975342] [] ... [0.18539121747016907] [-0.1793559193611145 1.324408769607544 -0.062219955027103424] []] at 0x0197297ed188>}
1.765625


In [18]:
def test_lambda_capture():
    dataset = ServiceXDataset("mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00")
    jets = ServiceXDatasetSource(dataset) \
        .Select('lambda e: e.Jets("AntiKt4EMTopoJets")') \
        .Select('lambda e: (e.Select(lambda jet: jet.pt()), \
                            e.Select(lambda jet: jet.eta()), \
                            e.Select(lambda jet: jet.phi()))') \
        .AsPandasDF(("JetPt", "JetEta", "JetPhi")) \
        .value()

    electrons = ServiceXDatasetSource(dataset) \
        .Select('lambda e: e.Electrons("Electrons")') \
        .Select('lambda e: (e.Select(lambda ele: ele.pt()), \
                            e.Select(lambda ele: ele.eta()), \
                            e.Select(lambda ele: ele.phi()))') \
        .AsPandasDF(("ElePt", "EleEta", "ElePhi")) \
        .value()

#    jet_four_vector = uproot_methods.TLorentzVectorArray.from_ptetaphi(jets[b'JetPt'], jets[b'JetEta'], jets[b'JetPhi'], jets[b'JetE'])
#    ele_four_vector = uproot_methods.TLorentzVectorArray.from_ptetaphi(electrons[b'ElePt'], electrons[b'EleEta'], electrons[b'ElePhi'], jets[b'EleE'])
    jetr = abs(math.sqrt(jets.JetEta**2 + jets.JetPhi**2))
    eler = abs(math.sqrt(electrons.EleEta**2 + electrons.ElePhi**2))

    for electron in eler:
        event_counter = 0
        electrons_within_tolerance = []
        for jet in jetr:
            if abs(jet - electron) <= 1.0:
                electrons_within_tolerance.append(event_counter)
        event_counter += 1
    
    final_list = []

    for i in range(len(electrons_within_tolerance)):
        final_list.append(electrons.ElePt[electrons_within_tolerance[i]])
        
    return final_list

In [20]:
    dataset = ServiceXDataset("mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00")
    jets = ServiceXDatasetSource(dataset) \
        .Select('lambda e: e.Jets("AntiKt4EMTopoJets")') \
        .Select('lambda e: (e.Select(lambda jet: jet.pt()), \
                            e.Select(lambda jet: jet.eta()), \
                            e.Select(lambda jet: jet.phi()))') \
        .AsPandasDF(("JetPt", "JetEta", "JetPhi")) \
        .value()

    electrons = ServiceXDatasetSource(dataset) \
        .Select('lambda e: e.Electrons("Electrons")') \
        .Select('lambda e: (e.Select(lambda ele: ele.pt()), \
                            e.Select(lambda ele: ele.eta()), \
                            e.Select(lambda ele: ele.phi()))') \
        .AsPandasDF(("ElePt", "EleEta", "ElePhi")) \
        .value()

In [22]:
print(jets.JetPt)
print(jets.JetEta)

entry   subentry
0       0           52898.621094
        1           28133.312500
        2           15308.400391
        3            5986.582031
        4            5813.203125
                        ...     
149998  7            6211.655273
149999  0           47653.144531
        1           32738.951172
        2            6260.788574
        3            5394.782715
Name: JetPt, Length: 11355980, dtype: float64
entry   subentry
0       0          -0.127285
        1           0.588417
        2           1.202751
        3          -1.154236
        4          -0.962194
                      ...   
149998  7          -1.823650
149999  0          -4.098131
        1          -3.804388
        2           0.380551
        3          -1.682883
Name: JetEta, Length: 11355980, dtype: float64


In [2]:
large_dataset = ServiceXDataset('data17_13TeV:data17_13TeV.periodK.physics_Main.PhysCont.DAOD_STDM7.grp23_v01_p4030', max_workers = 400)
t0 = time.process_time()
query = ServiceXDatasetSource(large_dataset) \
        .SelectMany('lambda e: (e.Jets("AntiKt4EMTopoJets"))') \
        .Where('lambda j: (j.pt()/1000)>30') \
        .Select('lambda j: (j.pt())') \
        .AsPandasDF("JetPt") \
        .value()
t1 = time.process_time()

HBox(children=(FloatProgress(value=0.0, description='data17_13TeV:data17_13TeV.periodK.physics_Main.PhysCont.D…

HBox(children=(FloatProgress(value=0.0, description='        Downloaded', layout=Layout(flex='2'), max=9000000…

ServiceXException: (ServiceXException(...), 'Failed to transform all files')

In [3]:
print(t1 - t0)

NameError: name 't1' is not defined