In [1]:
# import math
# import operator
import sys
import time
# from collections import OrderedDict
# from functools import reduce
import pickle
import json

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot
# from mpl_toolkits.axes_grid1.inset_locator import inset_axes
# from skhep.visual import MplPlotter as skh_plt

# import multiprocessing
import concurrent.futures

from XRootD import client
from XRootD.client.flags import DirListFlags, StatInfoFlags, OpenFlags, MkDirFlags, QueryCode
# import xrdfs_find

In [2]:
%load_ext autoreload
%autoreload 1
%aimport utils.ObjectExtractor
%aimport utils.PlotMaker
%aimport utils.HistogramContainer
%aimport utils.HistogramCalculator
OE = utils.ObjectExtractor
PM = utils.PlotMaker
HCont = utils.HistogramContainer
HCalc = utils.HistogramCalculator

In [3]:
print(sys.version_info)
print(pd.__version__)
# num_cores = multiprocessing.cpu_count()
# print(num_cores)

plt.style.use('default')
plt.rcParams['grid.linestyle'] = ':'
plt.rcParams.update({'font.size': 10})

numCuts = np.arange(0,6)

sys.version_info(major=3, minor=6, micro=4, releaselevel='final', serial=0)
0.24.2


In [4]:
all_plot_vars = ['metpt', 'jetpt','metjetphi', 'metmuphi', 'leadingmupt', 'subleadingmupt', 'recodr', 'recovertex']
metjet_plot_vars = all_plot_vars[0:4] # ['metpt', 'jetpt', 'metjetphi', 'metmuphi']
muons_plot_vars = all_plot_vars[4:8] # ['leadingmupt', 'subleadingmupt', 'recodr', 'recovertex']
cutflow_vars = ['cutflow_incl', 'cutflow_excl']
all_plot_xlabels = [
    'MET [GeV]', 'Leading jet pT [GeV]', '$\Delta\Phi$(MET, jet)', '$\Delta\Phi$(MET, di-muon)',
    'Leading muon pT [GeV]', 'Subleading muon pT [GeV]', 'dR(muons)', 'Di-muon vertex [cm]'
]

In [5]:
with open('config/sig.json') as sigs_json_file:
    sigs = json.load(sigs_json_file)

In [None]:
with open('config/bkgs.json') as bkgs_json_file:
    bkgs = json.load(bkgs_json_file)

# Global muons

In [6]:
#with open('data/histos_signal_objects_gbm.dat', 'rb') as file:
with open('histos_signal_objects_gbm.dat', 'rb') as file:

    histos_signal_from_file_gbm = pickle.load(file)

In [7]:
luminosity = 59.97 * 1000 # 1/pb
for sig, properties in sigs.items():
    try:
        properties['weight'] = luminosity * properties['xsec'] / histos_signal_from_file_gbm['sumgenwgt'][sig]
    except KeyError:
        properties['weight'] = 1
        
histos = {}
for plot_var in histos_signal_from_file_gbm:
    try:
        histos[plot_var] = {**histos_signal_from_file_gbm[plot_var], **histos_signal_from_file_gbm[plot_var]}
    except KeyError:
        histos[plot_var] = histos_signal_from_file_gbm[plot_var]

In [None]:
with open('data/histos_bkgs_objects_gbm.dat', 'rb') as file:
#with open('histos_bkgs_objects_gbm.dat', 'rb') as file:
    histos_bkgs_from_file_gbm = pickle.load(file)

In [None]:
luminosity = 59.97 * 1000 # 1/pb
for bkg, properties in bkgs.items():
    try:
        properties['weight'] = luminosity * properties['xsec'] / histos_bkgs_from_file_gbm['sumgenwgt'][bkg]
    except KeyError:
        properties['weight'] = 1

In [None]:
histos = {}
for plot_var in histos_bkgs_from_file_gbm:
    try:
        histos[plot_var] = {**histos_bkgs_from_file_gbm[plot_var], **histos_signal_from_file_gbm[plot_var]}
    except KeyError:
        histos[plot_var] = histos_bkgs_from_file_gbm[plot_var]

In [None]:
# Conversion from dict file into objects
# Shouldn't need this unless recovering from a broken Jupyter session
# histos_gbm = {}
# for var in histos_from_file_gbm:
#     histos_gbm[var] = {}
#     if 'cutflow' in var or 'genwgt' in var:
#         for bkg in histos_from_file_gbm[var]:
#             histos_gbm[var][bkg] = histos_from_file_gbm[var][bkg]
#     else:
#         for bkg in histos_from_file_gbm[var]:
#             histos_gbm[var][bkg] = HCont.HistogramContainer(bins=len(histos_from_file_gbm[var][bkg]['counts'][0]))
#             histos_gbm[var][bkg] += list(zip(histos_from_file_gbm[var][bkg]['counts'].values(),
#                                       {cut:histos_from_file_gbm[var][bkg]['edges'] for cut in numCuts}.values(),
#                                       histos_from_file_gbm[var][bkg]['wgt_sqrd'].values()))

### Cutflow table

In [8]:
cutflow_incl_grps = {}
for sig, properties in sigs.items():
    grp = properties['group']
    if grp not in cutflow_incl_grps:
        cutflow_incl_grps[grp] = (histos_signal_from_file_gbm['cutflow_incl'][sig]*properties['weight']).astype(int)
    else:
        cutflow_incl_grps[grp] += (histos_signal_from_file_gbm['cutflow_incl'][sig]*properties['weight']).astype(int)
        
# for mchi in ['5p25', '6p0', '52p5', '60p0']:
#     cutflow_incl_grps[mchi] = histos_signal_from_file_gbm['cutflow_incl'][mchi].astype(int)
        
pd.DataFrame.from_dict(cutflow_incl_grps)

Unnamed: 0,sig
0,5123836800
1,1381870027
2,1219462634
3,510350687
4,258083645
5,258083645
6,258083645
7,181972430
8,100498372
9,47526467


In [9]:
pd.DataFrame.from_dict(dict(sorted(histos['cutflow_incl'].items())))

Unnamed: 0,Mchi-52p5_dMchi-5p0_ctau-1,Mchi-52p5_dMchi-5p0_ctau-10,Mchi-52p5_dMchi-5p0_ctau-100,Mchi-52p5_dMchi-5p0_ctau-1000,Mchi-5p25_dMchi-0p5_ctau-1,Mchi-5p25_dMchi-0p5_ctau-10,Mchi-5p25_dMchi-0p5_ctau-100,Mchi-5p25_dMchi-0p5_ctau-1000,Mchi-60p0_dMchi-20p0_ctau-1,Mchi-60p0_dMchi-20p0_ctau-10,Mchi-60p0_dMchi-20p0_ctau-100,Mchi-60p0_dMchi-20p0_ctau-1000,Mchi-6p0_dMchi-2p0_ctau-1,Mchi-6p0_dMchi-2p0_ctau-10,Mchi-6p0_dMchi-2p0_ctau-100,Mchi-6p0_dMchi-2p0_ctau-1000
0,157634.0,157501.0,156942.0,157255.0,58363.0,58319.0,58380.0,58380.0,140505.0,140550.0,140524.0,140668.0,45400.0,45378.0,45400.0,45400.0
1,50997.0,51205.0,52096.0,53124.0,10999.0,11358.0,11468.0,11921.0,42368.0,42994.0,47754.0,52121.0,8419.0,9568.0,10946.0,11699.0
2,44977.0,45372.0,45866.0,46784.0,9613.0,9898.0,10032.0,10362.0,38027.0,38551.0,42468.0,46031.0,7543.0,8423.0,9585.0,10160.0
3,19930.0,20284.0,20817.0,21824.0,4166.0,4374.0,4524.0,4725.0,13473.0,13500.0,16040.0,20320.0,2587.0,3022.0,3965.0,4574.0
4,13675.0,13619.0,11270.0,5598.0,2407.0,2194.0,1213.0,735.0,11606.0,11388.0,10688.0,5632.0,1883.0,2059.0,1341.0,769.0
5,13675.0,13619.0,11270.0,5598.0,2407.0,2194.0,1213.0,735.0,11606.0,11388.0,10688.0,5632.0,1883.0,2059.0,1341.0,769.0
6,13675.0,13619.0,11270.0,5598.0,2407.0,2194.0,1213.0,735.0,11606.0,11388.0,10688.0,5632.0,1883.0,2059.0,1341.0,769.0
7,9184.0,9082.0,7212.0,2381.0,1380.0,1183.0,469.0,180.0,10580.0,10364.0,9235.0,3236.0,1611.0,1739.0,874.0,243.0
8,1176.0,5715.0,6215.0,1788.0,143.0,674.0,290.0,59.0,1910.0,7365.0,8414.0,2717.0,330.0,1284.0,709.0,125.0
9,558.0,2680.0,2468.0,581.0,67.0,298.0,83.0,12.0,1160.0,4414.0,4082.0,1023.0,167.0,625.0,275.0,48.0


In [None]:
cutflow_incl_grps = {}
for bkg, properties in bkgs.items():
    grp = properties['group']
    if grp not in cutflow_incl_grps:
        cutflow_incl_grps[grp] = (histos_bkgs_from_file_gbm['cutflow_incl'][bkg]*properties['weight']).astype(int)
    else:
        cutflow_incl_grps[grp] += (histos_bkgs_from_file_gbm['cutflow_incl'][bkg]*properties['weight']).astype(int)
        
# for mchi in ['5p25', '6p0', '52p5', '60p0']:
#     cutflow_incl_grps[mchi] = histos_signal_from_file_gbm['cutflow_incl'][mchi].astype(int)
        
pd.DataFrame.from_dict(cutflow_incl_grps)

In [None]:
pd.DataFrame.from_dict(dict(sorted(histos['cutflow_incl'].items())))

### Plots

In [None]:
def plot_binned_data_error(axis, bin_edges, data, wgt_sqrd, *args, **kwargs):
#     errors = []
    weights = np.empty(len(data))
#     errors = np.sqrt(np.sum(wgt_sqrd, axis=0))
    errors = wgt_sqrd[0]
    for i in np.arange(1, len(wgt_sqrd)):
        errors = errors.add(wgt_sqrd[i], fill_value=0)
    errors = errors.reindex(np.arange(1, len(bin_edges)), fill_value=0)
    errors = np.sqrt(errors)
    binwidth = bin_edges[1] - bin_edges[0]
#         if 'density' in kwargs and kwargs['density'] == True:
#             errors = errors/np.sum(data)/binwidth
        #The dataset values are the bin centres
    x = (bin_edges[1:] + bin_edges[:-1]) / 2.0
        #The weights are the y-values of the input binned data
#         weights[i] = data[i]
    return skh_plt.hist(x, ax=axis, bins=bin_edges, weights=np.sum(data, axis=0), errorbars=errors, *args, **kwargs)

In [None]:
def plot_stacked_binned_data(axis, bin_edges, data, wgt_sqrd, *args, **kwargs):
    errors = wgt_sqrd[0]
    for i in np.arange(1, len(wgt_sqrd)):
        errors = errors.add(wgt_sqrd[i], fill_value=0)
    errors = np.sqrt(errors)
    errors = np.array(errors.reindex(np.arange(1, len(bin_edges)), fill_value=0))
    #The dataset values are the bin centres
    x = (bin_edges[1:] + bin_edges[:-1]) / 2.0
    x = np.array([x]).repeat(len(data), axis=0)
    x = np.transpose(x)
    #The weights are the y-values of the input binned data
    weights = np.transpose(data)
    return skh_plt.hist(x, ax=axis, bins=bin_edges, weights=weights, errorbars=errors, stacked=True, *args, **kwargs)

In [None]:
from skhep.visual import MplPlotter as skh_plt

In [None]:
from collections import OrderedDict

In [None]:
plot_var = 'metjetphi'
cut=0
labels = []
grp_histos = {}
for bkg, properties in bkgs.items():
    grp = properties['group']
    if grp not in grp_histos:
        grp_histos[grp] = HCont.HistogramContainer()
    # self.histos[plot_var][bkg].set_weight(properties['weight'])
    # FIXME placeholder while H.C. doesn't have set_weight
    grp_histos[grp].counts[cut] += histos[plot_var][bkg].counts[cut] * properties['weight']
    grp_histos[grp].edges = histos[plot_var][bkg].edges
    grp_histos[grp].wgt_sqrd[cut] = grp_histos[grp].wgt_sqrd[cut].add(
        histos[plot_var][bkg].wgt_sqrd[cut] * properties['weight']**2, fill_value=0)
    
sorted_keys = sorted(grp_histos, key=lambda obj: max(grp_histos[obj].counts[0]))
sorted_grp_histos = OrderedDict()
for key in sorted_keys:
#     if 'DY' not in key: continue
    sorted_grp_histos[key] = grp_histos[key]
    labels.append(key)

fig, axis = plt.subplots()
plot_stacked_binned_data(axis, histos['metjetphi']['ZZTo2L2Nu'].edges,
                       np.array([sorted_grp_histos[grp].counts[0] for grp in sorted_grp_histos]),
                      [sorted_grp_histos[grp].wgt_sqrd[0] for grp in sorted_grp_histos], label=labels);

axis.set_yscale('log', nonposy='clip')
# axis.set_ylim(bottom = 1)

axis.legend()

In [None]:
plt_maker = PM.PlotMaker(histos, bkgs)

fig1, axes1 = plt.subplots(2, 2, figsize=(8,5))
fig1.tight_layout(h_pad=4.0, w_pad=3.0)

for idx, var in enumerate(metjet_plot_vars):
    plt_maker.make_group_stacked_plot(axes1.flatten()[idx], var, 0, log=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes1.flatten()[idx].set_xlabel(xlabel)
    
axes1[0,1].legend(loc=(1.1,-0.55));

In [None]:
plt_maker = PM.PlotMaker(histos, bkgs, ['5p25', '60p0'])

fig1, axes1 = plt.subplots(2, 2, figsize=(8,5))
fig1.tight_layout(h_pad=4.0, w_pad=3.0)

for idx, var in enumerate(metjet_plot_vars):
    plt_maker.make_group_plot(axes1.flatten()[idx], var, 0, density=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes1.flatten()[idx].set_xlabel(xlabel)
    
axes1[0,1].legend(loc=(1.1,-0.55));

In [None]:
fig2, axes2 = plt.subplots(2,2, figsize=(8,5))
fig2.tight_layout(h_pad=4.0, w_pad=3.0)
for idx, var in enumerate(muons_plot_vars):
    plt_maker.make_group_stacked_plot(axes2.flatten()[idx], var, 4, log=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes2.flatten()[idx].set_xlabel(xlabel)
    
axes2[0,1].legend(loc=(1.1,-0.55));

In [None]:
fig2, axes2 = plt.subplots(2,2, figsize=(8,5))
fig2.tight_layout(h_pad=4.0, w_pad=3.0)
for idx, var in enumerate(muons_plot_vars):
    plt_maker.make_group_plot(axes2.flatten()[idx], var, 0, density=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes2.flatten()[idx].set_xlabel(xlabel)
    
axes2[0,1].legend(loc=(1.1,-0.55));

In [None]:
fig2, axes2 = plt.subplots(2,2, figsize=(8,5))
fig2.tight_layout(h_pad=4.0, w_pad=3.0)
for idx, var in enumerate(plot_vars_muons):
    plt_maker.make_group_plot(axes2.flatten()[idx], var, bkg_grps, 0, density=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes2.flatten()[idx].set_xlabel(xlabel)
axes2[0,1].legend(loc=(1.1,-0.55));

# Displaced standalone muons

In [None]:
with open('data/histos_objects_dsa.dat', 'rb') as histos_file:
    histos_from_file_dsa = pickle.load(histos_file)

In [None]:
plt_maker = PM.PlotMaker(histos_from_file_dsa)

fig1, axes1 = plt.subplots(2, 2, figsize=(8,5))
fig1.tight_layout(h_pad=4.0, w_pad=3.0)
for idx, var in enumerate(plot_vars_metjet):
    plt_maker.make_group_plot(axes1.flatten()[idx], var, bkg_grps, 0, density=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes1.flatten()[idx].set_xlabel(xlabel)
axes1[0,1].legend(loc=(1.1,-0.55));

In [None]:
fig2, axes2 = plt.subplots(2,2, figsize=(8,5))
fig2.tight_layout(h_pad=4.0, w_pad=3.0)
for idx, var in enumerate(plot_vars_muons):
    plt_maker.make_group_plot(axes2.flatten()[idx], var, bkg_grps, 0, density=True)
    xlabel = all_plot_xlabels[all_plot_vars.index(var)]
    axes2.flatten()[idx].set_xlabel(xlabel)
axes2[0,1].legend(loc=(1.1,-0.55));