## Import modules

In [1]:
# import math
# import operator
import sys
import time
# from collections import OrderedDict
# from functools import reduce
import json
import pickle

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot
# from mpl_toolkits.axes_grid1.inset_locator import inset_axes
# from skhep.visual import MplPlotter as skh_plt

# import multiprocessing
import concurrent.futures

from XRootD import client
from XRootD.client.flags import DirListFlags, StatInfoFlags, OpenFlags, MkDirFlags, QueryCode
# import xrdfs_find

Import local classes from utils

In [2]:
%load_ext autoreload
%autoreload 1
%aimport utils.ObjectExtractor
%aimport utils.PlotMaker
%aimport utils.HistogramContainer
%aimport utils.HistogramCalculator
OE = utils.ObjectExtractor
PM = utils.PlotMaker
HCont = utils.HistogramContainer
HCalc = utils.HistogramCalculator

## Global initialization

In [138]:
print(sys.version_info)
# num_cores = multiprocessing.cpu_count()
# print(num_cores)

executor = concurrent.futures.ThreadPoolExecutor(48)

plt.style.use('default')
plt.rcParams['grid.linestyle'] = ':'
plt.rcParams.update({'font.size': 10})

numCuts = np.arange(0,6)

branch_path = 'SREffi_gbm'

sys.version_info(major=3, minor=6, micro=4, releaselevel='final', serial=0)


In [4]:
all_plot_vars = ['metpt', 'jetpt','metjetphi', 'metmuphi', 'leadingmupt', 'subleadingmupt', 'recodr', 'recovertex']
plot_vars_metjet = all_plot_vars[0:4] #['metpt', 'jetpt', 'metjetphi', 'metmuphi']
plot_vars_muons = all_plot_vars[4:8] #['leadingmupt', 'subleadingmupt', 'recodr', 'recovertex']
cutflow_vars = ['cutflow_incl', 'cutflow_excl']
all_plot_xlabels = [
    'MET [GeV]', 'Leading jet pT [GeV]', '$\Delta\Phi$(MET, jet)', '$\Delta\Phi$(MET, di-muon)',
    'Leading muon pT [GeV]', 'Subleading muon pT [GeV]', 'dR(muons)', 'Di-muon vertex [cm]']

In [5]:
histos = {}
all_bins = {}
for plot_var in all_plot_vars:
    histos[plot_var] = {}
    all_bins[plot_var] = 60
histos['cutflow_incl'] = {}
histos['cutflow_excl'] = {}
histos['sumgenwgt'] = {}

## Process signal

In [84]:
masses = [('60p0','20p0'),('6p0','2p0'),('52p5','5p0'),('5p25','0p5')]
def print_masses(mass):
    return f'({float(mass[0].replace("p","."))-float(mass[1].replace("p","."))/2}, ' + \
           f'{float(mass[0].replace("p","."))+float(mass[1].replace("p","."))/2}) GeV'
    
mchis = dict([(mass[0], print_masses(mass)) for mass in masses])
ctaus = [10]#, 10, 100, 1000]

labels = [ f'cut{cut}' for cut in numCuts ]
cut_descriptions = [
    'cut1: MET/MHT trigger fired (120 GeV)',
    'cut2: j1 pT > 120 GeV, <= 2j w/ pT > 30 GeV',
    'cut3: mu1 pT > 5 GeV, 0.1 < |dxy| < 700 cm',
    'cut4: mu2 pT > 5 GeV, 0.1 < |dxy| < 700 cm',
    r'cut5: $|\Delta\Phi$(MET, mu pair)| < 0.4',
]


base_dir = '../Firefighter/washAOD/SROptimization/'
def filename(Mchi, dMchi, ctau): 
    return base_dir + f'Mchi-{Mchi}_dMchi-{dMchi}_ctau-{ctau}.root'

In [221]:
trees_gbm = dict()
gen_info_gbm = dict()

In [222]:
num_params = len(masses)*len(ctaus)
count_param = 1

for (Mchi, dMchi) in masses:
    trees_gbm[Mchi] = dict()
    gen_info_gbm[Mchi] = dict()
    for ctau in ctaus:
        gen_info_gbm[Mchi][ctau] = uproot.open(filename(Mchi, dMchi, ctau))['GEN/gen']#.pandas.df(flatten=False)
        trees_gbm[Mchi][ctau] = uproot.open(filename(Mchi, dMchi, ctau))[branch_path + f'/cutsTree']#.pandas.df(flatten=False)
        print(f'{count_param} of {num_params}: ' + filename(Mchi, dMchi, ctau))
        count_param += 1

1 of 4: ../Firefighter/washAOD/SROptimization/Mchi-60p0_dMchi-20p0_ctau-10.root
2 of 4: ../Firefighter/washAOD/SROptimization/Mchi-6p0_dMchi-2p0_ctau-10.root
3 of 4: ../Firefighter/washAOD/SROptimization/Mchi-52p5_dMchi-5p0_ctau-10.root
4 of 4: ../Firefighter/washAOD/SROptimization/Mchi-5p25_dMchi-0p5_ctau-10.root


In [230]:
histos_signal = {}
for plot_var in all_plot_vars:
    histos_signal[plot_var] = {}
for plot_var in cutflow_vars:
    histos_signal[plot_var] = {}
    
for mchi in mchis:
    for plot_var in all_plot_vars:
        histos_signal[plot_var][mchi] = HCont.HistogramContainer(all_bins[plot_var])
    histos_signal['cutflow_incl'][mchi] = np.zeros(len(numCuts))
    histos_signal['cutflow_excl'][mchi] = np.zeros(len(numCuts))
        
    ### Make pandas dataframes and create all objects that will be passed to histo functions
    obj_extractor = OE.ObjectExtractor(trees_gbm[mchi][ctau], mchi)
    objects = obj_extractor.get_all()

    ### Calculate histograms and cutflows
    histo_computer = HCalc.HistogramCalculator(objects, mchi)

    ### Cutflows
    incl, excl = histo_computer.cutflows()
    histos_signal['cutflow_incl'][mchi] += incl
    histos_signal['cutflow_excl'][mchi] += excl

    ### Histograms
    for plot_var in all_plot_vars:
        new_hist = eval(f'histo_computer.{plot_var}()')
        histos_signal[plot_var][mchi] += new_hist

Sample "60p0" does not have either pileup or weight information
Sample "6p0" does not have either pileup or weight information
Sample "52p5" does not have either pileup or weight information
Sample "5p25" does not have either pileup or weight information


In [232]:
with open('histos_signal_objects_gbm.dat', 'wb') as histos_file:
    pickle.dump(histos_signal, histos_file)

## Process backgrounds

In [None]:
with open('config/bkgs.json') as bkgs_json_file:
    bkgs = json.load(bkgs_json_file)

In [196]:
xrdfs = client.FileSystem("root://cmseos.fnal.gov/")

redirector = 'root://cmsxrootd.fnal.gov'
bkg_base_dir = '/store/group/lpcmetx/iDM/Ntuples/2018/backgrounds'
files = {}

for bkg, properties in bkgs.items():
    files[bkg] = []
    status, listing = xrdfs.dirlist(f'{bkg_base_dir}/{properties["dir"]}', DirListFlags.STAT)
    for file in listing:
        if '.root' in file.name:
            files[bkg].append(f'{redirector}/{bkg_base_dir}/{properties["dir"]}/{file.name}')

In [197]:
num_files_total = np.sum(np.array([len(files[i]) for i in files]))
print(num_files_total)
[(i, len(files[i])) for i in files]

1364


[('DYJetsToLL', 490),
 ('QCD_bEnriched_HT100to200', 82),
 ('QCD_bEnriched_HT200to300', 63),
 ('QCD_bEnriched_HT300to500', 13),
 ('QCD_bEnriched_HT500to700', 24),
 ('QCD_bEnriched_HT700to1000', 11),
 ('QCD_bEnriched_HT1000to1500', 1),
 ('QCD_bEnriched_HT1500to2000', 1),
 ('QCD_bEnriched_HT2000toINF', 1),
 ('TTTo2L2Nu', 172),
 ('TTJets', 30),
 ('TT_diLept', 5),
 ('WJetsToLNu_HT-70To100', 168),
 ('WJetsToLNu_HT-100To200', 66),
 ('WJetsToLNu_HT-200To400', 37),
 ('WJetsToLNu_HT-400To600', 7),
 ('ZJetsToNuNu_HT-100To200', 54),
 ('ZJetsToNuNu_HT-200To400', 41),
 ('ZJetsToNuNu_HT-400To600', 36),
 ('ZJetsToNuNu_HT-600To800', 12),
 ('ZJetsToNuNu_HT-800To1200', 10),
 ('ZJetsToNuNu_HT-1200To2500', 1),
 ('ZJetsToNuNu_HT-2500ToInf', 1),
 ('WWJJToLNuLNu', 1),
 ('WWTo2L2Nu', 20),
 ('WZTo3LNu', 1),
 ('ZZTo2L2Nu', 16)]

In [198]:
%%time

MAX_FILES=None # To load all possible files
# MAX_FILES=1 # For testing

### Initialize empty dicts of histograms 
# histos = {}
# all_bins = {}
# for plot_var in all_plot_vars:
#     histos[plot_var] = {}
#     all_bins[plot_var] = 60
# histos['cutflow_incl'] = {}
# histos['cutflow_excl'] = {}
# histos['sumgenwgt'] = {}

global_file_counter = 1

for bkg in bkgs:
    
    print(f'Processing background {bkg} ({(list(bkgs.keys())).index(bkg)+1}/{len(bkgs)})')
    
    ### Initialize histograms as empty HistogramContainers
    for plot_var in all_plot_vars:
        histos[plot_var][bkg] = HCont.HistogramContainer(all_bins[plot_var])
    histos['cutflow_incl'][bkg] = np.zeros(len(numCuts))
    histos['cutflow_excl'][bkg] = np.zeros(len(numCuts))
    histos['sumgenwgt'][bkg] = 0.0
    
    ### Load data
    file_counter = 1
    for file in files[bkg][slice(0,MAX_FILES)]:
        
        if file_counter % 10 == 1:
            print(f'Reading file {file_counter} of {len(files[bkg])},'
                  f' global {global_file_counter} of {num_files_total}'
                  f' ({100*(global_file_counter-1)/num_files_total:.2f}%)')
            with open('histos_temp.dat', 'wb') as histos_file:
                pickle.dump(histos, histos_file)
        file_counter += 1
        global_file_counter += 1
        
        ### Open ROOT file and get tree
        tree = uproot.open(file)[branch_path + '/cutsTree']
        
        ### Make pandas dataframes and create all objects that will be passed to histo functions
        obj_extractor = OE.ObjectExtractor(tree)
        objects = obj_extractor.get_all()
            
        ## Add to sum of genwgts
        histos['sumgenwgt'][bkg] += np.sum(objects['genwgt'])
        
        ### Calculate histograms and cutflows
        histo_maker = HCalc.HistogramCalculator(objects, bkg)
            
        ### Cutflows
        incl, excl = histo_maker.cutflows()
        histos['cutflow_incl'][bkg] += incl
        histos['cutflow_excl'][bkg] += excl
        
        ### Histograms
        for plot_var in all_plot_vars:
            new_hist = eval(f'histo_maker.{plot_var}()')
            histos[plot_var][bkg] += new_hist

Processing background ZJetsToNuNu_HT-100To200 (17/27)
Reading file 1 of 54, global 1 of 1364 (0.00%)
Reading file 11 of 54, global 11 of 1364 (0.73%)
Reading file 21 of 54, global 21 of 1364 (1.47%)
Reading file 41 of 54, global 41 of 1364 (2.93%)
Reading file 51 of 54, global 51 of 1364 (3.67%)
Processing background ZJetsToNuNu_HT-200To400 (18/27)
Reading file 1 of 41, global 55 of 1364 (3.96%)
Reading file 11 of 41, global 65 of 1364 (4.69%)
Reading file 21 of 41, global 75 of 1364 (5.43%)
Reading file 31 of 41, global 85 of 1364 (6.16%)
Reading file 41 of 41, global 95 of 1364 (6.89%)
Processing background ZJetsToNuNu_HT-400To600 (19/27)
Reading file 1 of 36, global 96 of 1364 (6.96%)
Reading file 11 of 36, global 106 of 1364 (7.70%)
Reading file 21 of 36, global 116 of 1364 (8.43%)
Reading file 31 of 36, global 126 of 1364 (9.16%)
Processing background ZJetsToNuNu_HT-600To800 (20/27)
Reading file 1 of 12, global 132 of 1364 (9.60%)
Reading file 11 of 12, global 142 of 1364 (10.34%)

In [208]:
luminosity = 59.97 * 1000 # 1/pb
for bkg, properties in bkgs.items():
    properties['weight'] = luminosity * properties['xsec'] / histos['sumgenwgt'][bkg]
#     except KeyError:
#         properties['weight'] = 1

In [199]:
for bkg, properties in bkgs.items():
    try:
        print(bkg, luminosity * properties['xsec'] / histos['sumgenwgt'][bkg], histos['sumgenwgt'][bkg])
    except KeyError: pass

DYJetsToLL 3.1989342473853943 100108278.33105469
QCD_bEnriched_HT100to200 1710.3151354724303 39341486.609375
QCD_bEnriched_HT200to300 257.47818781801607 18672629.8671875
QCD_bEnriched_HT300to500 83.19688493156073 12030490.09375
QCD_bEnriched_HT500to700 8.701149003075141 10200445.9375
QCD_bEnriched_HT700to1000 10.09261989076103 1762981.1875
QCD_bEnriched_HT1000to1500 5.668163836746107 491976.78125
QCD_bEnriched_HT1500to2000 0.5451332217184982 409126.46875
QCD_bEnriched_HT2000toINF 0.0979757226515774 396573.375
TTTo2L2Nu 0.008885813924168618 4637210204.0
TTJets 2.879345680798001 10234706.515625
TT_diLept 0.0151966502648976 286025244.0
WJetsToLNu_HT-70To100 2.7458663433947224 28130050.8984375
WJetsToLNu_HT-100To200 8.740290350128095 9557830.078125
WJetsToLNu_HT-200To400 0.9559325101988377 25457682.1484375
WJetsToLNu_HT-400To600 0.5860504341753632 5913597.359375
ZJetsToNuNu_HT-100To200 0.7514695079011287 24260305.71875
ZJetsToNuNu_HT-200To400 0.23164843002810628 23716334.703125
ZJetsToNuNu

In [227]:
with open('histos_bkgs_objects_gbm.dat', 'wb') as histos_file:
    pickle.dump(histos, histos_file)

PicklingError: Can't pickle <class 'utils.HistogramContainer.HistogramContainer'>: it's not the same object as utils.HistogramContainer.HistogramContainer

In [75]:
cutFlowInclGrp = {}
for grp in bkg_grps:
    if '60p0' in grp or '5p25' in grp or '52p5' in grp or '6p0' in grp: continue
    for bkg in bkg_grps[grp]:
        if grp in cutFlowInclGrp.keys():
            cutFlowInclGrp[grp] += histos['cutflow_incl'][bkg].astype(int)
        else:
            cutFlowInclGrp[grp] = histos['cutflow_incl'][bkg].astype(int)

pd.DataFrame.from_dict(cutFlowInclGrp)

Unnamed: 0,Di-boson,V+Jets,TTbar,QCD,DY
0,38576300,292182833,105324364,212168469,100194597
1,1784391,22787706,5905556,574661,42378
2,346020,6276581,142540,61747,8944
3,55453,1155522,70082,32028,2992
4,10672,59317,15914,4300,320
5,1011,7282,1229,2492,102


In [213]:
cutFlowInclGrp2 = {}
for grp in bkg_grps:
#     if '60p0' in grp or '5p25' in grp or '52p5' in grp or '6p0' in grp: continue
    for bkg in bkg_grps[grp]:
        if grp in cutFlowInclGrp2.keys():
            try:
                cutFlowInclGrp2[grp] += (histos['cutflow_incl'][bkg]*bkgs[bkg]['weight']).astype(int)
            except KeyError: pass
        else:
            try:
                cutFlowInclGrp2[grp] = (histos['cutflow_incl'][bkg]*bkgs[bkg]['weight']).astype(int)
            except KeyError: pass


pd.DataFrame.from_dict(cutFlowInclGrp2)

Unnamed: 0,Di-boson,V+Jets,TTbar,QCD,DY
0,310108,213689064,30080860,73223324490,320515927
1,22300,7621102,1158423,1685736,135564
2,1918,2494019,17336,228249,28611
3,6,6653,201,13629,99
4,0,16,2,362,0
5,0,0,0,317,0
