## Import modules

In [1]:
import sys
import time
import json
import pickle

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot
import concurrent.futures

from XRootD import client
from XRootD.client.flags import DirListFlags, StatInfoFlags, OpenFlags, MkDirFlags, QueryCode

Import local classes from utils

In [2]:
%load_ext autoreload
%autoreload 1
%aimport utils.ObjectExtractor
%aimport utils.PlotMaker
%aimport utils.HistogramContainer
%aimport utils.HistogramCalculator
OE = utils.ObjectExtractor
PM = utils.PlotMaker
HCont = utils.HistogramContainer
HCalc = utils.HistogramCalculator

## Global initialization

In [3]:
print(sys.version_info)
# num_cores = multiprocessing.cpu_count()
# print(num_cores)


executor = concurrent.futures.ThreadPoolExecutor(48)

plt.style.use('default')
plt.rcParams['grid.linestyle'] = ':'
plt.rcParams.update({'font.size': 10})

numCuts = np.arange(0,6)

branch_path = 'SREffi_gbm'

labels = [ f'cut{cut}' for cut in numCuts ]
cut_descriptions = [
    'cut1: MET/MHT trigger fired (120 GeV)',
    'cut2: j1 pT > 120 GeV, <= 2j w/ pT > 30 GeV',
    'cut3: mu1 pT > 5 GeV, 0.1 < |dxy| < 700 cm',
    'cut4: mu2 pT > 5 GeV, 0.1 < |dxy| < 700 cm',
    r'cut5: $|\Delta\Phi$(MET, mu pair)| < 0.4',
]

sys.version_info(major=3, minor=6, micro=4, releaselevel='final', serial=0)


In [4]:
all_plot_vars = ['metpt', 'jetpt','metjetphi', 'metmuphi', 'leadingmupt', 'subleadingmupt', 'recodr', 'recovertex']
plot_vars_metjet = all_plot_vars[0:4] #['metpt', 'jetpt', 'metjetphi', 'metmuphi']
plot_vars_muons = all_plot_vars[4:8] #['leadingmupt', 'subleadingmupt', 'recodr', 'recovertex']
cutflow_vars = ['cutflow_incl', 'cutflow_excl']
all_plot_xlabels = [
    'MET [GeV]', 'Leading jet pT [GeV]', '$\Delta\Phi$(MET, jet)', '$\Delta\Phi$(MET, di-muon)',
    'Leading muon pT [GeV]', 'Subleading muon pT [GeV]', 'dR(muons)', 'Di-muon vertex [cm]']

In [5]:
histos = {}
all_bins = {}
for plot_var in all_plot_vars:
    histos[plot_var] = {}
    all_bins[plot_var] = 60
histos['cutflow_incl'] = {}
histos['cutflow_excl'] = {}
histos['sumgenwgt'] = {}

## Process signal

In [6]:
## new signal input
with open('config/sig.json') as sigs_json_file:
    sigs = json.load(sigs_json_file)

xrdfs = client.FileSystem("root://cmseos.fnal.gov/")

redirector = 'root://cmsxrootd.fnal.gov'
sig_base_dir = '/store/group/lpcmetx/iDM/Ntuples/2018/signal/track_quality/iDM_2018_MC'
files = {}

for sig, properties in sigs.items():
    files[sig] = []
    status, listing = xrdfs.dirlist(f'{sig_base_dir}/{properties["dir"]}', DirListFlags.STAT)
    for file in listing:
        if '.root' in file.name:
            files[sig].append(f'{redirector}/{sig_base_dir}/{properties["dir"]}/{file.name}')
num_files_total = np.sum(np.array([len(files[i]) for i in files]))
print(num_files_total)
[(i, len(files[i])) for i in files]

1208


[('Mchi-60p0_dMchi-20p0_ctau-1', 229),
 ('Mchi-60p0_dMchi-20p0_ctau-1000', 484),
 ('Mchi-5p25_dMchi-0p5_ctau-1000', 495)]

In [7]:
%%time

MAX_FILES=None # To load all possible files
# MAX_FILES=1 # For testing

### Initialize empty dicts of histograms 
# histos = {}
# all_bins = {}
# for plot_var in all_plot_vars:
#     histos[plot_var] = {}
#     all_bins[plot_var] = 60
# histos['cutflow_incl'] = {}
# histos['cutflow_excl'] = {}
# histos['sumgenwgt'] = {}

global_file_counter = 1

for sig in sigs:
    
    print(f'Processing signal {sig} ({(list(sigs.keys())).index(sig)+1}/{len(sigs)})')
    
    ### Initialize histograms as empty HistogramContainers
    for plot_var in all_plot_vars:
        histos[plot_var][sig] = HCont.HistogramContainer(all_bins[plot_var])
    histos['cutflow_incl'][sig] = np.zeros(len(numCuts))
    histos['cutflow_excl'][sig] = np.zeros(len(numCuts))
    histos['sumgenwgt'][sig] = 0.0
    
    ### Load data
    file_counter = 1
    for file in files[sig][slice(0,MAX_FILES)]:
        
        if file_counter % 10 == 1:
            print(f'Reading file {file_counter} of {len(files[sig])},'
                  f' global {global_file_counter} of {num_files_total}'
                  f' ({100*(global_file_counter-1)/num_files_total:.2f}%)')
            with open('histos_temp.dat', 'wb') as histos_file:
                pickle.dump(histos, histos_file)
        file_counter += 1
        global_file_counter += 1
        
        ### Open ROOT file and get tree
        tree = uproot.open(file)[branch_path + '/cutsTree']
        
        ### Make pandas dataframes and create all objects that will be passed to histo functions
        obj_extractor = OE.ObjectExtractor(tree)
        objects = obj_extractor.get_all()
            
        ## Add to sum of genwgts
        histos['sumgenwgt'][sig] += np.sum(objects['genwgt'])
        
        ### Calculate histograms and cutflows
        histo_maker = HCalc.HistogramCalculator(objects, sig)
            
        ### Cutflows
        incl, excl = histo_maker.cutflows()
        histos['cutflow_incl'][sig] += incl
        histos['cutflow_excl'][sig] += excl
        
        ### Histograms
        for plot_var in all_plot_vars:
            new_hist = eval(f'histo_maker.{plot_var}()')
            histos[plot_var][sig] += new_hist


Processing signal Mchi-60p0_dMchi-20p0_ctau-1 (1/3)
Reading file 1 of 229, global 1 of 1208 (0.00%)
Reading file 11 of 229, global 11 of 1208 (0.83%)
Reading file 21 of 229, global 21 of 1208 (1.66%)
Reading file 31 of 229, global 31 of 1208 (2.48%)
Reading file 41 of 229, global 41 of 1208 (3.31%)
Reading file 51 of 229, global 51 of 1208 (4.14%)
Reading file 61 of 229, global 61 of 1208 (4.97%)
Reading file 71 of 229, global 71 of 1208 (5.79%)
Reading file 81 of 229, global 81 of 1208 (6.62%)
Reading file 91 of 229, global 91 of 1208 (7.45%)
Reading file 101 of 229, global 101 of 1208 (8.28%)
Reading file 111 of 229, global 111 of 1208 (9.11%)
Reading file 121 of 229, global 121 of 1208 (9.93%)
Reading file 131 of 229, global 131 of 1208 (10.76%)
Reading file 141 of 229, global 141 of 1208 (11.59%)
Reading file 151 of 229, global 151 of 1208 (12.42%)
Reading file 161 of 229, global 161 of 1208 (13.25%)
Reading file 171 of 229, global 171 of 1208 (14.07%)
Reading file 181 of 229, glob

Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Reading file 111 of 484, global 340 of 1208 (28.06%)
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Reading file 121 of 484, global 350 of 1208 (28.89%)
Sample "" does not have either pileup or weight inform

Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Reading file 241 of 484, global 470 of 1208 (38.82%)
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight

Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Reading file 361 of 484, global 590 of 1208 (48.76%)
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Reading file 371 of 484, global 600 of 1208 (49.59%)
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight information
Sample "" does not have either pileup or weight inform

Processing signal Mchi-5p25_dMchi-0p5_ctau-1000 (3/3)
Reading file 1 of 495, global 714 of 1208 (59.02%)
Reading file 11 of 495, global 724 of 1208 (59.85%)
Reading file 21 of 495, global 734 of 1208 (60.68%)
Reading file 31 of 495, global 744 of 1208 (61.51%)
Reading file 41 of 495, global 754 of 1208 (62.33%)
Reading file 51 of 495, global 764 of 1208 (63.16%)
Reading file 61 of 495, global 774 of 1208 (63.99%)
Reading file 71 of 495, global 784 of 1208 (64.82%)
Reading file 81 of 495, global 794 of 1208 (65.65%)
Reading file 91 of 495, global 804 of 1208 (66.47%)
Reading file 101 of 495, global 814 of 1208 (67.30%)
Reading file 111 of 495, global 824 of 1208 (68.13%)
Reading file 121 of 495, global 834 of 1208 (68.96%)
Reading file 131 of 495, global 844 of 1208 (69.78%)
Reading file 141 of 495, global 854 of 1208 (70.61%)
Reading file 151 of 495, global 864 of 1208 (71.44%)
Reading file 161 of 495, global 874 of 1208 (72.27%)
Reading file 171 of 495, global 884 of 1208 (73.10%)
Rea

In [8]:
luminosity = 59.97 * 1000 # 1/pb
for sig, properties in sigs.items():
    properties['weight'] = luminosity * properties['xsec'] / histos['sumgenwgt'][sig]
#     except KeyError:
#         properties['weight'] = 1
for sig, properties in sigs.items():
    try:
        print(sig, luminosity * properties['xsec'] / histos['sumgenwgt'][sig], histos['sumgenwgt'][sig])
    except KeyError: pass
    
with open('histos_signal_objects_gbm.dat', 'wb') as histos_file:
    pickle.dump(histos, histos_file)

Mchi-60p0_dMchi-20p0_ctau-1 5241.240589198036 61100.0
Mchi-60p0_dMchi-20p0_ctau-1000 2232.9121867548006 143418.0
Mchi-5p25_dMchi-0p5_ctau-1000 5135.092923688726 62363.0


## Process backgrounds

In [9]:
with open('config/bkgs2.json') as bkgs_json_file:
    bkgs = json.load(bkgs_json_file)

In [10]:
xrdfs = client.FileSystem("root://cmseos.fnal.gov/")

redirector = 'root://cmsxrootd.fnal.gov'
bkg_base_dir = '/store/group/lpcmetx/iDM/Ntuples/2018/backgrounds'
files = {}

for bkg, properties in bkgs.items():
    files[bkg] = []
    status, listing = xrdfs.dirlist(f'{bkg_base_dir}/{properties["dir"]}', DirListFlags.STAT)
    for file in listing:
        if '.root' in file.name:
            files[bkg].append(f'{redirector}/{bkg_base_dir}/{properties["dir"]}/{file.name}')

In [11]:
num_files_total = np.sum(np.array([len(files[i]) for i in files]))
print(num_files_total)
[(i, len(files[i])) for i in files]

1370


[('DYJetsToLL', 490),
 ('QCD_bEnriched_HT100to200', 82),
 ('QCD_bEnriched_HT200to300', 63),
 ('QCD_bEnriched_HT300to500', 13),
 ('QCD_bEnriched_HT500to700', 24),
 ('QCD_bEnriched_HT700to1000', 11),
 ('QCD_bEnriched_HT1000to1500', 1),
 ('QCD_bEnriched_HT1500to2000', 1),
 ('QCD_bEnriched_HT2000toINF', 1),
 ('TTTo2L2Nu', 172),
 ('TTJets', 30),
 ('TT_diLept', 5),
 ('WJetsToLNu_HT-70To100', 168),
 ('WJetsToLNu_HT-100To200', 66),
 ('WJetsToLNu_HT-200To400', 37),
 ('WJetsToLNu_HT-400To600', 7),
 ('ZJetsToNuNu_HT-100To200', 54),
 ('ZJetsToNuNu_HT-200To400', 41),
 ('ZJetsToNuNu_HT-400To600', 36),
 ('ZJetsToNuNu_HT-600To800', 12),
 ('ZJetsToNuNu_HT-800To1200', 10),
 ('ZJetsToNuNu_HT-1200To2500', 1),
 ('ZJetsToNuNu_HT-2500ToInf', 1),
 ('WWJJToLNuLNu', 1),
 ('WWTo2L2Nu', 20),
 ('WZTo3LNu', 1),
 ('ZZTo2L2Nu', 16),
 ('NoBPTX', 6)]

In [None]:
%%time

MAX_FILES=None # To load all possible files
# MAX_FILES=1 # For testing

### Initialize empty dicts of histograms 
# histos = {}
# all_bins = {}
# for plot_var in all_plot_vars:
#     histos[plot_var] = {}
#     all_bins[plot_var] = 60
# histos['cutflow_incl'] = {}
# histos['cutflow_excl'] = {}
# histos['sumgenwgt'] = {}

global_file_counter = 1

for bkg in bkgs:
    
    print(f'Processing background {bkg} ({(list(bkgs.keys())).index(bkg)+1}/{len(bkgs)})')
    
    ### Initialize histograms as empty HistogramContainers
    for plot_var in all_plot_vars:
        histos[plot_var][bkg] = HCont.HistogramContainer(all_bins[plot_var])
    histos['cutflow_incl'][bkg] = np.zeros(len(numCuts))
    histos['cutflow_excl'][bkg] = np.zeros(len(numCuts))
    histos['sumgenwgt'][bkg] = 0.0
    
    ### Load data
    file_counter = 1
    for file in files[bkg][slice(0,MAX_FILES)]:
        
        if file_counter % 10 == 1:
            print(f'Reading file {file_counter} of {len(files[bkg])},'
                  f' global {global_file_counter} of {num_files_total}'
                  f' ({100*(global_file_counter-1)/num_files_total:.2f}%)')
            with open('histos_temp.dat', 'wb') as histos_file:
                pickle.dump(histos, histos_file)
        file_counter += 1
        global_file_counter += 1
        
        ### Open ROOT file and get tree
        tree = uproot.open(file)[branch_path + '/cutsTree']
        
        ### Make pandas dataframes and create all objects that will be passed to histo functions
        obj_extractor = OE.ObjectExtractor(tree)
        objects = obj_extractor.get_all()
            
        ## Add to sum of genwgts
        histos['sumgenwgt'][bkg] += np.sum(objects['genwgt'])
        
        ### Calculate histograms and cutflows
        histo_maker = HCalc.HistogramCalculator(objects, bkg)
            
        ### Cutflows
        incl, excl = histo_maker.cutflows()
        histos['cutflow_incl'][bkg] += incl
        histos['cutflow_excl'][bkg] += excl
        
        ### Histograms
        for plot_var in all_plot_vars:
            new_hist = eval(f'histo_maker.{plot_var}()')
            histos[plot_var][bkg] += new_hist

Processing background DYJetsToLL (1/28)
Reading file 1 of 490, global 1 of 1370 (0.00%)
Reading file 11 of 490, global 11 of 1370 (0.73%)
Reading file 21 of 490, global 21 of 1370 (1.46%)
Reading file 31 of 490, global 31 of 1370 (2.19%)
Reading file 41 of 490, global 41 of 1370 (2.92%)
Reading file 51 of 490, global 51 of 1370 (3.65%)
Reading file 61 of 490, global 61 of 1370 (4.38%)
Reading file 71 of 490, global 71 of 1370 (5.11%)
Reading file 81 of 490, global 81 of 1370 (5.84%)
Reading file 91 of 490, global 91 of 1370 (6.57%)
Reading file 101 of 490, global 101 of 1370 (7.30%)
Reading file 111 of 490, global 111 of 1370 (8.03%)
Reading file 121 of 490, global 121 of 1370 (8.76%)
Reading file 131 of 490, global 131 of 1370 (9.49%)
Reading file 141 of 490, global 141 of 1370 (10.22%)
Reading file 151 of 490, global 151 of 1370 (10.95%)
Reading file 161 of 490, global 161 of 1370 (11.68%)
Reading file 171 of 490, global 171 of 1370 (12.41%)
Reading file 181 of 490, global 181 of 137

In [None]:
luminosity = 59.97 * 1000 # 1/pb
for bkg, properties in bkgs.items():
    properties['weight'] = luminosity * properties['xsec'] / histos['sumgenwgt'][bkg]
#     except KeyError:
#         properties['weight'] = 1

In [None]:
for bkg, properties in bkgs.items():
    try:
        print(bkg, luminosity * properties['xsec'] / histos['sumgenwgt'][bkg], histos['sumgenwgt'][bkg])
    except KeyError: pass

In [None]:
with open('histos_bkgs_objects_gbm.dat', 'wb') as histos_file:
    pickle.dump(histos, histos_file)

In [None]:
cutFlowInclGrp = {}
for grp in bkg_grps:
    if '60p0' in grp or '5p25' in grp or '52p5' in grp or '6p0' in grp: continue
    for bkg in bkg_grps[grp]:
        if grp in cutFlowInclGrp.keys():
            cutFlowInclGrp[grp] += histos['cutflow_incl'][bkg].astype(int)
        else:
            cutFlowInclGrp[grp] = histos['cutflow_incl'][bkg].astype(int)

pd.DataFrame.from_dict(cutFlowInclGrp)

In [None]:
cutFlowInclGrp2 = {}
for grp in bkg_grps:
#     if '60p0' in grp or '5p25' in grp or '52p5' in grp or '6p0' in grp: continue
    for bkg in bkg_grps[grp]:
        if grp in cutFlowInclGrp2.keys():
            try:
                cutFlowInclGrp2[grp] += (histos['cutflow_incl'][bkg]*bkgs[bkg]['weight']).astype(int)
            except KeyError: pass
        else:
            try:
                cutFlowInclGrp2[grp] = (histos['cutflow_incl'][bkg]*bkgs[bkg]['weight']).astype(int)
            except KeyError: pass


pd.DataFrame.from_dict(cutFlowInclGrp2)