In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
warnings.filterwarnings("ignore")

import os

import awkward as ak
import uproot
import numpy as np
import glob
from coffea.nanoevents import NanoEventsFactory, BaseSchema, NanoAODSchema
from coffea import hist, processor
# register our candidate behaviors
from coffea.nanoevents.methods import candidate
ak.behavior.update(candidate.behavior)

from functools import partial

from tools.helpers import get_four_vec_fromPtEtaPhiM, match

from yahist import Hist1D, Hist2D

import json

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import mplhep as hep

plt.style.use(hep.style.CMS)  # or ATLAS/LHCb

In [None]:
from tools.helpers import dasWrapper
from analysis.tagger import desired_output

redirector_ucsd = 'root://xcache-redirector.t2.ucsd.edu:2042/'
redirector_fnal = 'root://cmsxrootd.fnal.gov/'

# maybe we'll need the number of events for weighting, but not at the moment
def get_nevents(name):
    res = dasWrapper(name, query='summary')
    return json.loads(res[0])[0]['nevents']


In [None]:
samples_signal = [
    '/ttHTobb_M125_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v2/NANOAODSIM',
]

Z_samples = [
    '/ZJetsToNuNu_HT-100To200_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ZJetsToNuNu_HT-200To400_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ZJetsToNuNu_HT-400To600_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ZJetsToNuNu_HT-800To1200_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ZJetsToNuNu_HT-1200To2500_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ZJetsToNuNu_HT-2500ToInf_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
]

tt_samples = [
    '/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    #'/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/TT_Mtt-1000toInf_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ST_tW_top_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v2/NANOAODSIM',
    '/ST_tW_antitop_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v2/NANOAODSIM',
    '/ST_t-channel_top_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/ST_t-channel_antitop_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8/RunIISummer20UL18NanoAODv9-20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
]

QCD_samples = [
    '/QCD_bEnriched_HT100to200_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT200to300_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT300to500_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT500to700_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT700to1000_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT1000to1500_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT1500to2000_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    '/QCD_bEnriched_HT2000toInf_TuneCP5_13TeV-madgraph-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
]

W_samples = [
    #'/W0JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM'
    #'/W1JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    #'/W2JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    #'/W3JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM',
    #'/W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v2/NANOAODSIM',
    '/WJetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL18NanoAODv9-106X_upgrade2018_realistic_v16_L1v1-v1/NANOAODSIM'
]

rare_samples = [
    '',
]

In [None]:
fileset_signal = {s.split('/')[1]: [redirector_fnal+p for p in dasWrapper(s)] for s in samples_signal}
fileset_Z = {s.split('/')[1]: [redirector_ucsd+p for p in dasWrapper(s)] for s in Z_samples}
fileset_QCD = {s.split('/')[1]: [redirector_ucsd+p for p in dasWrapper(s)] for s in QCD_samples}
fileset_W = {s.split('/')[1]: [redirector_ucsd+p for p in dasWrapper(s)] for s in W_samples}
fileset_tt = {s.split('/')[1]: [redirector_ucsd+p for p in dasWrapper(s)][:10] for s in tt_samples}

In [None]:
exe_args = {
    'workers': 12,
    "schema": NanoAODSchema,
    "skipbadfiles": True,
}
exe = processor.futures_executor

fileset = fileset_tt



## Mass resolutions

### Run 2 FullSim

In [None]:
fileset_signal;

In [None]:
ev = NanoEventsFactory.from_root(
        #'6BF93845-49D5-2547-B860-4F7601074715.root',
        #'/nfs-7/userdata/dspitzba/ZJetsToNuNu_HT-200To400_14TeV-madgraph_200PU//ZJetsToNuNu_HT-200To400_14TeV-madgraph_200PU_1.root:myana',
        #'/hadoop/cms/store/user/dspitzba/ProjectMetis/WJetsToLNu_GenMET-100_TuneCUETP8M1_14TeV-madgraphMLM-pythia8_200PU_v16/skim_13.root',
        treepath='mytree',
        schemaclass=NanoAODSchema,
    ).events()

In [None]:
from tools.helpers import get_four_vec_fromPtEtaPhiM, match, delta_r, delta_r2, yahist_2D_lookup



def match_count(first, second, deltaRCut=0.4):
    drCut2 = deltaRCut**2
    combs = ak.cartesian([first, second], nested=True)
    return ak.sum((delta_r2(combs['0'], combs['1'])<drCut2), axis=2)

In [None]:
fat = ev.FatJet[
    (ev.FatJet.pt>300) &\
    (abs(ev.FatJet.eta)<2.4) &\
    (ev.FatJet.jetId>0)
]

higgs = ev.GenPart[((abs(ev.GenPart.pdgId)==25)&(ev.GenPart.status==62))]

bquark = ev.GenPart[((abs(ev.GenPart.pdgId)==5)&(ev.GenPart.status==71))]

nb_in_fat = match_count(fat, bquark, deltaRCut=0.8)

nhiggs_in_fat = match_count(fat, higgs, deltaRCut=0.8)

In [None]:
h_mSD_res_full = Hist1D(
    np.abs(ak.flatten(fat[((nhiggs_in_fat>0)&(nb_in_fat>1))].msoftdrop)-125),
    bins=np.linspace(0,100,21)
)
h_mSD_res_full

In [None]:
sum(h_mSD_res_full.counts[0:5])

In [None]:
sum(h_mSD_res_full.counts)

In [None]:
h_mSD_full = Hist1D(
    ak.flatten(fat[((nhiggs_in_fat>0)&(nb_in_fat>1))].msoftdrop),
    bins=np.linspace(0,250,26)
)
h_mSD_full

In [None]:
Hist1D(
    np.abs(ak.flatten(fat[((nhiggs_in_fat>0)&(nb_in_fat>1))].msoftdrop)-125),
    bins=np.linspace(0,200,21)
)

In [None]:
np.median(ak.flatten(fat[((nhiggs_in_fat>0)&(nb_in_fat>1))].msoftdrop))

In [None]:
Hist1D(
    ak.flatten(fat[((nhiggs_in_fat>0))].msoftdrop),
    bins=np.linspace(0,250,int(250/10)+1)
)

In [None]:
Hist2D(
    (
        ak.flatten(fat[((nhiggs_in_fat>0)&(nb_in_fat>1))].msoftdrop),
        ak.flatten(fat[((nhiggs_in_fat>0)&(nb_in_fat>1))].pt)
    ),
    bins="20,0,200,10,200,500",
)

In [None]:
np.linspace(0,250,int(250/10)+1)

### Delphes Phase 2

In [None]:
events = NanoEventsFactory.from_root(
        '/nfs-7/userdata/dspitzba/merge_ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8_200PU_v16/merge_1.root',
        treepath='mytree',
        schemaclass=BaseSchema,
    ).events()

In [None]:
gen_sel = ((abs(events.genpart_pid)==6) | (abs(events.genpart_pid)==5) | (abs(events.genpart_pid)==25))  # NOTE: attempt to speed up reading gigantic gen particle branches

gen = get_four_vec_fromPtEtaPhiM(
    None,
    pt = events.genpart_pt[gen_sel],
    eta = events.genpart_eta[gen_sel],
    phi = events.genpart_phi[gen_sel],
    M = events.genpart_mass[gen_sel],
    copy = False,
)
gen['pdgId'] = events.genpart_pid[gen_sel]
gen['status'] = events.genpart_status[gen_sel]

bquark = gen[((abs(gen.pdgId)==5)&(gen.status==71))]  # I suspect that Delphes does not keep b's with pt less than 20?
higgs = gen[(abs(gen.pdgId)==25)][:,-1:]  # just get the last Higgs. Delphes is not keeping all the higgses.

fatjet = get_four_vec_fromPtEtaPhiM(
        None,
        pt = events.fatjet_pt,
        eta = events.fatjet_eta,
        phi = events.fatjet_phi,
        M = events.fatjet_msoftdrop,        #Using softdrop from now on
        copy = False,
    )

fatjet = fatjet[
    (fatjet.pt>300) &\
    (abs(fatjet.eta)<2.4) 
    #(ev.FatJet.jetId>0)
]

nhiggs_in_fat = match_count(fatjet, higgs, deltaRCut=0.8)
nb_in_fat = match_count(fatjet, bquark, deltaRCut=0.8)

In [None]:
h_mSD_res = Hist1D(
    np.abs(ak.flatten(fatjet[((nhiggs_in_fat>0)&(nb_in_fat>1))].mass)-125),
    bins=np.linspace(0,100,21)
)
h_mSD_res

In [None]:
h_mSD = Hist1D(
    ak.flatten(fatjet[((nhiggs_in_fat>0)&(nb_in_fat>1))].mass),
    bins=np.linspace(0,250,26)
)
h_mSD

In [None]:
smear_by = 1+np.maximum(np.random.normal(loc=1.2, scale=1.0, size=len(ak.flatten(fatjet[((nhiggs_in_fat>0)&(nb_in_fat>1))].mass))), 0)

h_mSD_res_smeared = Hist1D(
    np.abs(ak.flatten(fatjet[((nhiggs_in_fat>0)&(nb_in_fat>1))].mass)-125)*smear_by,
    bins=np.linspace(0,100,21)
)
h_mSD_res_smeared

In [None]:
def correct_mass(jet, scale, res):
    import numpy as np
    np.random.seed(seed=123)
    
    jet_flat = ak.flatten(jet)
    
    correction = np.maximum(
        1+np.random.normal(
            loc=scale,
            scale=res,
            size=len(jet_flat),
        ),
        0
    )
    
    jet['mass'] = ak.unflatten(jet_flat.mass * correction, ak.num(jet))
    
    return jet

In [None]:
ak.flatten(fatjet.mass[((nhiggs_in_fat>0)&(nb_in_fat>1))])

In [None]:
fj = correct_mass(fatjet, 0.5, 0.1)

In [None]:
ak.flatten(fj.mass[((nhiggs_in_fat>0)&(nb_in_fat>1))])

In [None]:
np.random.seed(seed=123)

smear_by = np.maximum(1+np.random.normal(loc=-0.50, scale=0.10, size=len(ak.flatten(fatjet[((nhiggs_in_fat>0)&(nb_in_fat>1))].mass))), 0)


h_mSD_smeared = Hist1D(
    ak.flatten(fatjet[((nhiggs_in_fat>0)&(nb_in_fat>1))].mass)*smear_by,
    bins=np.linspace(0,250,26)
)
h_mSD_smeared

In [None]:
Hist1D(
    smear_by,
    #bins=np.linspace(0,250,26)
)

In [None]:
sum(h_mSD_res.counts[0:2])

In [None]:
sum(h_mSD_res.counts)

In [None]:
h_mSD_res.edges

### Comparison

In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,7))

hep.cms.label(
    "Simulation",
    data=True,
    #year=2018,
    #lumi=60.0,
    loc=0,
    ax=ax,
)

#vals_tagged = output['tagged'][sname].sum('phi', 'mass', 'eta', 'dataset').values()[()]
#vals_inclusive = output['inclusive'][sname].sum('phi', 'mass', 'eta', 'dataset').values()[()]

hep.histplot(
    [ h_mSD_res_full.counts, h_mSD_res.counts, h_mSD_res_smeared.counts ],
    h_mSD_res.edges,
    #w2=[ vals_tagged, vals_inclusive ],
    histtype="step",
    stack=False,
    label=['Run 2', 'Delphes Phase-2', 'Phase-2 smeared'],
    #color=[ my_histos[x].color for x in keys ],
    ax=ax,density=True)

ax.legend()

ax.set_xlabel(r'$|m_{SD}-m_{h}|\ (GeV)$')
ax.set_ylabel(r'a.u.')

plt.show()

fig.savefig('/home/users/dspitzba/public_html/HbbMET/MSD_res.png')
fig.savefig('/home/users/dspitzba/public_html/HbbMET/MSD_res.pdf')




In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,7))

hep.cms.label(
    "Simulation",
    data=True,
    #year=2018,
    #lumi=60.0,
    loc=0,
    ax=ax,
)

hep.histplot(
    [ h_mSD_full.counts, h_mSD.counts, h_mSD_smeared.counts ],
    h_mSD.edges,
    #w2=[ vals_tagged, vals_inclusive ],
    histtype="step",
    stack=False,
    label=['Run 2', 'Delphes Phase-2', 'Phase-2 smeared'],
    #color=[ my_histos[x].color for x in keys ],
    ax=ax,density=True)

ax.legend()

ax.set_xlabel(r'$m_{SD}\ (GeV)$')
ax.set_ylabel(r'a.u.')

plt.show()

fig.savefig('/home/users/dspitzba/public_html/HbbMET/MSD.png')
fig.savefig('/home/users/dspitzba/public_html/HbbMET/MSD.pdf')



In [None]:
ratio = (h_mSD_res_full.counts/sum(h_mSD_res_full.counts))/(h_mSD_res.counts/sum(h_mSD_res.counts))
ratio

In [None]:
norm_ratio = ratio/sum(ratio)
norm_ratio

In [None]:
np.random.choice(h_mSD_res_full.edges[:-1], 10, p=norm_ratio)

In [None]:
Hist1D(
    np.random.choice(h_mSD_res_full.edges[:-1], 10000, p=norm_ratio),
    bins=h_mSD_res_full.edges[:-1]
)

In [None]:
np.random.normal(loc=2.0, scale=1.0, size=1)

In [None]:
Hist1D(
    np.random.normal(loc=1.0, scale=1.0, size=100000),
    #bins=h_mSD_res_full.edges[:-1]
)

In [None]:
smear_by = 1+np.maximum(np.random.normal(loc=1.0, scale=1.0, size=100), 0)
smear_by

## Get the efficiencies

In [None]:
from analysis.tagger import measure_eff


output = processor.run_uproot_job(
            fileset,
            "Events",
            measure_eff(accumulator=desired_output),
            exe,
            exe_args,
            chunksize=500000,
        )

In [None]:
def get_efficiency(region, process, mass_range=slice(100,150)):

    pt_bins  = hist.Bin('pt', r'$p_{T} \ (GeV)$', [200, 300, 400, 600, 800])
    eta_bins = hist.Bin('eta', r'$\eta$', 3, 0, 2.4)

    inclusive = output[region][process].rebin('pt', pt_bins).rebin('eta', eta_bins)
    tagged = output[region+'_tagged'][process].rebin('pt', pt_bins).rebin('eta', eta_bins)

    h1 = Hist2D.from_bincounts(
        inclusive.integrate('mass', int_range=mass_range).sum('phi', 'dataset').values()[()].T,
        (
            inclusive.axis('pt').edges(),
            inclusive.axis('eta').edges(),
        )
    )
    
    h2 = Hist2D.from_bincounts(
        tagged.integrate('mass', int_range=mass_range).sum('phi', 'dataset').values()[()].T,
        (
            tagged.axis('pt').edges(),
            tagged.axis('eta').edges(),
        )
    )
    

    return h2.divide(h1)

In [None]:
# check that stuff actually ran

output['1b'].sum('phi', 'mass').integrate('eta', slice(0,1)).integrate('pt', slice(500,1000)).values()

In [None]:
output['1h_tagged'].sum('phi', 'mass').integrate('eta', slice(0,1)).integrate('pt', slice(500,1000)).values()

In [None]:
6534.0/10065.0

In [None]:
for s in fileset.keys():
    print (s)
    for b in ['0b', '1b', '2b', '1h']:
        #print (b)
        h = get_efficiency(b, s, mass_range=slice(0,500))
        #h.plot()

        h.to_json(os.path.expandvars("../data/htag/eff_%s_%s.json"%(s,b)))
        
        #del h
    
h = get_efficiency(
    '2b',
    'ST_tW_antitop_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8',
    mass_range=slice(0,500),
)
fig, ax = plt.subplots(1,1,figsize=(7,7))
h.plot()

In [None]:
h = get_efficiency(
    '2b',
    'TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8',
    mass_range=slice(0,500),
)
fig, ax = plt.subplots(1,1,figsize=(7,7))
h.plot()

## Apply the efficiency

In [None]:
effs = {}
for s in fileset_Z.keys():
    effs[s] = {}
    print (s)
    for b in ['0b', '1b', '2b', '1h']:
        #print (b)
        #h = get_efficiency(b, s, mass_range=slice(0,500))
        #h.plot()
        effs[s][b] = Hist2D.from_json(os.path.expandvars("../data/htag/eff_%s_%s.json"%(s,b)))
        #effs[s][b] = effs[s][b]*0.95
        
for s in fileset.keys():
    effs[s] = {}
    print (s)
    for b in ['0b', '1b', '2b', '1h']:
        #print (b)
        #h = get_efficiency(b, s, mass_range=slice(0,500))
        #h.plot()
        effs[s][b] = Hist2D.from_json(os.path.expandvars("../data/htag/eff_%s_%s.json"%(s,b)))
        #effs[s][b] = effs[s][b]*0.95

In [None]:
fig, ax = plt.subplots()

hep.cms.label(
    "Simulation",
    data=True,
    #year=2018,
    #lumi=60.0,
    loc=0,
    ax=ax,
)

effs['ttHTobb_M125_TuneCP5_13TeV-powheg-pythia8']['1h'].plot()

ax.set_xlabel(r'$p_T\ (AK8\ jet)\ (GeV)$')
ax.set_ylabel(r'$|\eta|$')

plt.show()

fig.savefig('/home/users/dspitzba/public_html/HbbMET/1h_true_eff.png')
fig.savefig('/home/users/dspitzba/public_html/HbbMET/1h_true_eff.pdf')


In [None]:
effs['ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8']['1b']*1.05

In [None]:
effs['ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8']['1b'].counts

In [None]:
np.isnan(sum(sum(effs['ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8']['1h'].counts)))

In [None]:
from tools.helpers import yahist_2D_lookup
yahist_2D_lookup(
    effs['ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8']['0b'],
    ak.Array([[700]]),
    ak.Array([[2.]]),
)

In [None]:
from analysis.tagger import apply_eff, desired_output

exe_args = {
    'workers': 12,
    "schema": NanoAODSchema,
    "skipbadfiles": True,
}

output = processor.run_uproot_job(
            fileset_Z,
            "Events",
            apply_eff(
                accumulator=desired_output,
                effs = effs,
            ),
            exe,
            exe_args,
            chunksize=500000,
        )

### Closure and sanity checks

Make sure that the method closes in pt, and gives reasonable agreement in the mass distribution.


In [None]:
# if inclusive number below is nan we have a bug in applying the efficiencies.
output['inclusive'].sum('pt', 'eta', 'phi', 'mass').values()

In [None]:
# inclusive and tagged numbers should agree within percent level
output['tagged'].sum('pt', 'eta', 'phi', 'mass').values()

In [None]:
output['tagged'][sname].sum('phi', 'mass', 'eta', 'dataset').values(sumw2=True)

In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,7))

hep.cms.label(
    "Simulation",
    data=True,
    #year=2018,
    #lumi=60.0,
    loc=0,
    ax=ax,
)

sname = 'ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8'

vals_tagged = output['tagged'][sname].sum('phi', 'mass', 'eta', 'dataset').values()[()]
vals_inclusive = output['inclusive'][sname].sum('phi', 'mass', 'eta', 'dataset').values()[()]

hep.histplot(
    [ vals_tagged, vals_inclusive ],
    output['inclusive'][sname].axis('pt').edges(),
    w2=[ vals_tagged, vals_inclusive ],
    histtype="step",
    stack=False,
    label=['tagged', 'reweighted'],
    #color=[ my_histos[x].color for x in keys ],
    ax=ax)

ax.legend()

ax.set_xlabel(r'$p_T\ (AK8\ jet)\ (GeV)$')
ax.set_ylabel(r'Events')

plt.show()

fig.savefig('/home/users/dspitzba/public_html/HbbMET/tagging/application_pt.png')
fig.savefig('/home/users/dspitzba/public_html/HbbMET/tagging/application_pt.pdf')



In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,7))

hep.cms.label(
    "Simulation",
    data=True,
    #year=2018,
    #lumi=60.0,
    loc=0,
    ax=ax,
)

sname = 'ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8'

vals_tagged = output['tagged'][sname].sum('phi', 'pt', 'eta', 'dataset').values()[()]
vals_inclusive = output['inclusive'][sname].sum('phi', 'pt', 'eta', 'dataset').values()[()]

hep.histplot(
    [ vals_tagged, vals_inclusive ],
    output['inclusive'][sname].axis('mass').edges(),
    w2=[ vals_tagged, vals_inclusive ],
    histtype="step",
    stack=False,
    label=['tagged', 'reweighted'],
    #color=[ my_histos[x].color for x in keys ],
    ax=ax)

ax.legend()

ax.set_xlabel(r'$m_\text{AK8}\ (GeV)$')
ax.set_ylabel(r'Events')

ax.set_xlim([0,250])

plt.show()

fig.savefig('/home/users/dspitzba/public_html/HbbMET/tagging/application_mass.png')
fig.savefig('/home/users/dspitzba/public_html/HbbMET/tagging/application_mass.pdf')



In [None]:
h1 = Hist1D.from_bincounts(
        output['tagged'][sname].sum('phi', 'pt', 'eta', 'dataset').values()[()],
        output['tagged'][sname].axis('mass').edges(),
    )

h2 = Hist1D.from_bincounts(
        output['inclusive'][sname].sum('phi', 'pt', 'eta', 'dataset').values()[()],
        output['inclusive'][sname].axis('mass').edges(),
    )
h1.plot()
h2.plot()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(7,7))

h1 = Hist1D.from_bincounts(
        output['NH_true'][sname].sum('dataset').values()[()],
        output['NH_true'][sname].axis('multiplicity').edges(),
    )

h2 = Hist1D.from_bincounts(
        output['NH_weight'][sname].sum('dataset').values()[()],
        output['NH_weight'][sname].axis('multiplicity').edges(),
    )
h1.plot()
h2.plot()

ax.set_yscale('log')

### Compare prediction of tagged jets in the interesting mass window

In [None]:
output['inclusive'][sname].integrate('mass', int_range=slice(100,150)).sum('phi', 'pt', 'eta', 'dataset').values()[()]

In [None]:
output['tagged'][sname].integrate('mass', int_range=slice(100,150)).sum('phi', 'pt', 'eta', 'dataset').values()[()]

In [None]:
1539.101715319699/1462.0

In [None]:
test_eff = ak.Array([[0.1,0.2], [0.15], []])
1-ak.prod(1-test_eff, axis=1)

In [None]:
np.zeros_like(ak.num(test_eff))

In [None]:
output['NH_true'][sname].values()

In [None]:
output['NH_weight'][sname].values()

In [None]:
54281.68960112/(5.3884000e+04+1.7600000e+02)

## WIP

In [None]:
def compute_darkness(r, g, b, a=1.0):
    """Compute the 'darkness' value from RGBA (darkness = 1 - luminance)
       stolen from Nick Amin: https://github.com/aminnj/yahist
       Version from Jonathan Guiang: https://gist.github.com/jkguiang/279cb4d2e68e64148afc62274df09f18
    """
    return a * (1.0 - (0.299 * r + 0.587 * g + 0.114 * b))

def bin_text(counts, x_edges, y_edges, axes, cbar, errors=None, size=10, fmt=":0.2e"):
    """Write bin population on top of 2D histogram bins,
       stolen from Nick Amin: https://github.com/aminnj/yahist
       Version from Jonathan Guiang: https://gist.github.com/jkguiang/279cb4d2e68e64148afc62274df09f18
    """
    show_errors = (type(errors) != type(None))
    x_centers = x_edges[1:]-(x_edges[1:]-x_edges[:-1])/2
    y_centers = y_edges[1:]-(y_edges[1:]-y_edges[:-1])/2
    
    if show_errors:
        label_template = r"{0"+fmt+"}\n$\pm{1:0.2f}\%$"
    else:
        errors = np.zeros(counts.shape)
        label_template = r"{0"+fmt+"}"
        
    xyz = np.c_[        
        np.tile(x_centers, len(y_centers)),
        np.repeat(y_centers, len(x_centers)),
        counts.flatten(),
        errors.flatten()
    ][counts.flatten() != 0]

    r, g, b, a = cbar.mappable.to_rgba(xyz[:, 2]).T
    colors = np.zeros((len(xyz), 3))
    colors[compute_darkness(r, g, b, a) > 0.45] = 1

    for (x, y, count, err), color in zip(xyz, colors):
        axes.text(
            x,
            y,
            label_template.format(count, err),
            color=color,
            ha="center",
            va="center",
            fontsize=size,
            wrap=True,
        )

    return

In [None]:
h2.divide(h1).counts

In [None]:
h2.edges[0]