In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
import git

import uproot as ut
import awkward as ak
import numpy as np
import math
import vector
import sympy as sp

import re
from tqdm import tqdm
import timeit

sys.path.append( git.Repo('.', search_parent_directories=True).working_tree_dir )
from utils import *

import utils.torchUtils as gnn

plt.style.use('science')


In [2]:
subset = 'top6btag_signal'

event_filter = FilterSequence(
    CollectionFilter('jet', filter=lambda t: ak.argsort(ak.argsort(t.jet_btag,axis=-1,ascending=False)) < 6)
)

def has_higgs(t):
    jets = t.jet_signalId
    jets_idx = ak.local_index(jets, axis=-1)
    pairs = ak.unzip(ak.combinations(jets_idx, 2))
    j1_id, j2_id = jets[pairs[0]], jets[pairs[1]]
    diff = np.abs(j1_id - j2_id)
    add = j1_id + j2_id
    mod2 = add % 2
    paired = (diff*mod2 == 1) & ((add == 1) | (add == 5) | (add == 9) | (add == 13))
    
    signalId = ak.min(ak.concatenate([j1_id[:,:,None],j2_id[:,:,None]],axis=-1),axis=-1)//2
    signalId = ak.where(paired,signalId,-1)
    return ak.any(signalId>-1,axis=-1)

signal_filter = FilterSequence(
    event_filter,
    # EventFilter('at least one pair', filter=has_higgs)
    EventFilter('only signal bs',filter=lambda t:ak.sum(t.jet_signalId>-1,axis=-1)==8)
)

In [3]:
temptree = Tree(fc.eightb.training.signal_list)

In [4]:
import os 

os.system(f'rm -rf data/{subset}/*')

0

In [5]:
template = gnn.Dataset(f'data/template',temptree,make_template=True)

Processing...


Building Template Dataset...
Building Features...
Saving Dataset...


Done!


In [6]:
trainiter = TreeIter([Tree(fn) for fn in fc.eightb.training.signal_list])
trainiter = trainiter.apply(signal_filter.filter)

In [7]:
dataset = [ gnn.Dataset(f'data/{subset}/{tree.sample}-training',tree,template) for tree in trainiter ]

Processing...


Building Features...
Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...


Done!
Processing...


Building Features...
Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...


Done!
Processing...


Building Features...
Building Dataset...
Saving Dataset...


Done!


In [8]:
testiter = TreeIter([Tree(fn) for fn in fc.eightb.preselection.signal_list])

In [9]:
testiter = testiter.apply(signal_filter.filter)

In [10]:
dataset = [ gnn.Dataset(f'data/{subset}/{tree.sample}-testing',tree,template) for tree in testiter ]

Processing...


Building Features...
Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...


Done!


In [11]:
qcdtrees = ObjIter([ Tree(qcd,allow_empty=True) for qcd in fc.eightb.preselection.QCD_B_List ])

In [12]:
qcdtrees = qcdtrees.apply(event_filter)

In [13]:
for qcdtree in qcdtrees:
    fn = qcdtree.filelist[0].fname.split('/')[-2]
    print(fn)
    if qcdtree.raw_events > 0:
        qcdgraph = gnn.Dataset(f'data/{subset}/{fn}',qcdtree,template)

QCD_bEnriched_HT100to200_TuneCP5_13TeV-madgraph-pythia8
Building Features...


Processing...


Building Dataset...
Saving Dataset...
QCD_bEnriched_HT200to300_TuneCP5_13TeV-madgraph-pythia8
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
QCD_bEnriched_HT300to500_TuneCP5_13TeV-madgraph-pythia8
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_bEnriched_HT500to700_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_bEnriched_HT700to1000_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_bEnriched_HT1000to1500_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_bEnriched_HT1500to2000_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_bEnriched_HT2000toInf_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_HT100to200_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
QCD_HT200to300_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...
QCD_HT300to500_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...
QCD_HT500to700_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_HT700to1000_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_HT1000to1500_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...


Done!
Processing...


QCD_HT1500to2000_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...
Building Dataset...
Saving Dataset...
QCD_HT2000toInf_BGenFilter_TuneCP5_13TeV-madgraph-pythia8
Building Features...


Done!
Processing...


Building Dataset...
Saving Dataset...


Done!


In [14]:
qcdtree = Tree(fc.eightb.preselection.QCD_B_List)

In [15]:
from torch.utils.data import ConcatDataset

get_graph_path = lambda fn : fn.fname.split('/')[-2]
qcdgraphs = ConcatDataset([gnn.Dataset(f"data/{subset}/{fn}",transform=gnn.to_uptri_graph(), scale='standardize') for fn in map(get_graph_path,qcdtree.filelist)])

[<utils.classUtils.Tree.CopyTree at 0x7f0c79992610>]