In [1]:
import awkward as ak
import numpy as np
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
NanoAODSchema.warn_missing_crossrefs = False

These samples were produced using the MG process: 
```
p p > t l- vl~, (t > l+ vl b) 
p p > t~ l+ vl, (t~ > l- vl~ b~
```

Looking at my nanogen, the each event in events.LHEPart has this ordering: 

`[incoming1, incoming2, lepton1, neutrino1, b, lepton2, neutrino2]`

where incoming1/incoming2 are usually a combination of a gluon and b quark as the initial state from the pp collision. 
The two incoming particles have LHEPart.status = -1 so it is straightfoward to distinguish these from the b quark produced by the top quark decay. 
Using the b quark decay, I can then pair the correct lepton/neutrino via pdgId signs (+/-).
Since we are interested in looking at the lepton and neutrino pair produced at the same time as the top quark (not from the decay): 
- if the b quark pdgId = +5, I need to choose the lepton and anti-neutrino. 
- if the b quark pdgId = -5, I need to choose the antilepton, neutrino pair. 

After some manual checking, it looks like `events.LHEPart[5]` and `events.LHEPart[6]` are always the correct indices for this.
The loop confirms this.

In [3]:
files = {
    "stpt1_files": [
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_376.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_387.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_378.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_373.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_379.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_382.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_384.root"
        ],
    "stpt2_files": [
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_383.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_385.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_374.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_380.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_375.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_386.root",
        "/cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt2/nanoGen_377.root"
        ],
}

In [19]:
for file_group in files: 
    print(f"{file_group}")
    for fname in files[file_group]: 
        print(f"checking {fname} ... ")
        events = NanoEventsFactory.from_root(
            fname,
            schemaclass=NanoAODSchema.v6,
            metadata={"dataset": f"{fname[16:]}"},
        ).events()
        
        lhepart = events.LHEPart
        
        for i in range(len(events)):
            # check that the 0th and 1st events are incoming particles 
            assert (lhepart[i][0].status==-1 and lhepart[i][1].status==-1), f"1st or 2nd particle in LHE not incoming! status[0]={lhepart[i][0].status}, status[1]={lhepart[i][1].status}"
            # check that the 4th particle is a b quark and save th
            bquark_id = lhepart[i][4].pdgId 
            # if the b quark is a b (not a bbar), 
            # check that the last two indices are a lepton, anti-neutrino pair
            assert abs(bquark_id) == 5, "Not a b quark in index4! pdgId = {bquark.pdgId}"
            if bquark_id == 5: 
                lep_id = lhepart[i][5].pdgId
                nu_id = lhepart[i][6].pdgId
                # leps_truth = ((lep_id == 11) or (lep_id == 13) or (lep_id == 15))
                # nus_truth = ((nu_id == -12) or (nu_id == -14) or (nu_id == -16))
                lep_nu_truth = (lep_id == 11 and nu_id == -12) or (lep_id == 13 and nu_id == -14) or (lep_id == 15 and nu_id == -16)
                assert lep_nu_truth, f"lepton or neutrino mismatched: lep_id = {lep_id}, nu_id={nu_id}"
                # assert (leps_truth and nus_truth), f"lepton or neutrino mismatched: lep_id = {lep_id}, nu_id={nu_id}" 
            # if the b quark is a bbar
            # check that the last two indices are an antilepton and neutrino pair
            elif bquark_id == -5:
                lep_id = lhepart[i][5].pdgId
                nu_id = lhepart[i][6].pdgId
                lep_nu_truth = (lep_id == -11 and nu_id == 12) or (lep_id == -13 and nu_id == 14) or (lep_id == -15 and nu_id == 16)
                assert lep_nu_truth, f"lepton or neutrino mismatched: lep_id = {lep_id}, nu_id={nu_id}"
                # leps_truth = ((lep_id == -11) or (lep_id == -13) or (lep_id == -15))
                # nus_truth = ((nu_id == 12) or (nu_id == 14) or (nu_id == 16))
                # assert (leps_truth and nus_truth), f"lepton or neutrino mismatched: lep_id = {lep_id}, nu_id={nu_id}" 
        print("Success! No mismatches found. \n")
    print("--------------------------------------------- \n")

stpt1_files
checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_376.root ... 
Success! No mismatches found. 

checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_387.root ... 
Success! No mismatches found. 

checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_378.root ... 
Success! No mismatches found. 

checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_373.root ... 
Success! No mismatches found. 

checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_379.root ... 
Success! No mismatches found. 

checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_382.root ... 
Success! No mismatches found. 

checking /cms/cephfs/data/store/user/hnelson2/mc/NanoGen/tWLO_SMEFTsimtop/NanoGen_tWLO_rwgt1/nanoGen_384.r