# Main SS analysis development



In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from processor.SS_analysis import *
from Tools.config_helpers import *
from klepto.archives import dir_archive

In [None]:
from processor.default_accumulators import desired_output, add_processes_to_output, variations

In [None]:
# the equivalent code can be run with ipython -i SS_analysis.py within the processor directory

from Tools.samples import fileset_2018, fileset_2018_small
from processor.std_acumulators import desired_output, add_processes_to_output

overwrite = False

# load the config and the cache
cfg = loadConfig()

cacheName = 'SS_analysis'
cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cacheName), serialized=True)

year = 2018

fileset = {
    #'inclusive': glob.glob('/home/users/dspitzba/TTW/CMSSW_10_2_9/src/inclusive_1000.root'),
    #'plus': glob.glob('/home/users/dspitzba/TTW/CMSSW_10_2_9/src/plus_2000.root'),
    #'topW_v2': fileset_2018['topW_v2'], # verified that v2 and v3 are similar, v3 has more stats!
    'topW_v3': fileset_2018['topW_v3'],
    'topW_EFT_cp8': fileset_2018['topW_EFT_cp8'],
    'topW_EFT_mix': fileset_2018['topW_EFT_mix'],
    #'TTW': fileset_2018['TTW'],
    #'TTZ': fileset_2018['TTZ'],
    #'TTH': fileset_2018['TTH'],
    #'ttbar': fileset_2018['ttbar'][:12],
    #'ttbar1l': fileset_2018['ttbar1l'][:5],
    #'ttbar2l': fileset_2018['ttbar2l'][:5],
}


add_processes_to_output(fileset, desired_output)

histograms = sorted(list(desired_output.keys()))

exe_args = {
    'workers': 16,
    'function_args': {'flatten': False},
    "schema": NanoAODSchema,
}
exe = processor.futures_executor

if not overwrite:
    cache.load()

if cfg == cache.get('cfg') and histograms == cache.get('histograms') and cache.get('simple_output'):
    output = cache.get('simple_output')

else:
    print ("I'm running now")
    
    output = processor.run_uproot_job(
        fileset,
        "Events",
        SS_analysis(year=year, variations=variations, accumulator=desired_output),
        exe,
        exe_args,
        chunksize=250000,
    )
    
    cache['fileset']        = fileset
    cache['cfg']            = cfg
    cache['histograms']     = histograms
    cache['simple_output']  = output
    cache.dump()




In [None]:
def getChannel(histogram, n_ele=0):
    '''
    gets you just the ee/emu/mumu channel of each histogram by requiring 0-2 electrons
    '''
    bins_n_ele  = hist.Bin('n_ele', r'$N_{e}$', 1, n_ele-0.5, n_ele+0.5)
    tmp = histogram.copy()
    tmp = tmp.rebin('n_ele', bins_n_ele)
    tmp = tmp.sum('n_ele')
    #tmp = tmp.copy().project("dataset",axis)
    #nEvents = tmp.sum("dataset").values(overflow='over')[()].sum()
    #print ("Total number of events for signal point: %s"%nEvents)
    return tmp

In [None]:
getChannel(output['chargeFlip_vs_nonprompt'], n_ele=1).values()

In [None]:
output['chargeFlip_vs_nonprompt'].sum('n_ele').values()

In [None]:
import matplotlib.pyplot as plt

fig, ax  = plt.subplots(1,1,figsize=(10,10) )
ax = hist.plot2d(
    output['chargeFlip_vs_nonprompt']['ttbar1l'].sum('n_ele').sum('dataset'),
    xaxis='n1',
    ax=ax,
    text_opts={'format': '%.3g'},
    patch_opts={},
)
ax.set_xlabel(r'$N_{charge flips}$')
ax.set_ylabel(r'$N_{nonprompt}$')

In [None]:
fig, ax  = plt.subplots(1,1,figsize=(10,10) )
ax = hist.plot2d(
    output['chargeFlip_vs_nonprompt']['ttbar2l'].sum('n_ele').sum('dataset'),
    xaxis='n1',
    ax=ax,
    text_opts={'format': '%.3g'},
    patch_opts={},
)
ax.set_xlabel(r'$N_{charge flips}$')
ax.set_ylabel(r'$N_{nonprompt}$')

In [None]:
from Tools.helpers import getCutFlowTable

processes = [
    #"inclusive", "plus",
    #'topW_v2',
    'topW_v3',
    'topW_EFT_cp8', 'topW_EFT_mix',
    #'TTW', 'TTZ', 'TTH', 'ttbar',
]
lines = ['entry']
lines += ['lepveto', 'dilep', 'SS', 'filter', 'p_T(lep0)>30', 'p_T(lep1)>20', 'trigger']
lines += ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0', 'N_jet>4', 'N_central>3', 'ST', 'MET>50', 'delta_eta', 'fwd_p>500', 'p_T(lep0)>40', 'p_T(lep1)>30']
df = getCutFlowTable(output, processes=processes, lines=lines, significantFigures=4, signal='topW_v3')
df


In [None]:
ak.sum(test, axis=1)

In [None]:
fileset_2018['topW_v3']

In [None]:
df = getCutFlowTable(output, processes=processes, lines=lines, significantFigures=4, absolute=False, signal='topW_v3')
df

In [None]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use(hep.style.CMS)

from plots.helpers import makePlot
import re

bkgonly = re.compile('(?!(MuonEG))')

N_bins = hist.Bin('multiplicity', r'$N$', 10, -0.5, 9.5)
N_bins_red = hist.Bin('multiplicity', r'$N$', 5, -0.5, 4.5)
pt_bins = hist.Bin('pt', r'$p_{T}\ (GeV)$', 30, 0, 300)
pt_bins_coarse = hist.Bin('pt', r'$p_{T}\ (GeV)$', 10, 0, 300)
eta_bins = hist.Bin('eta', r'$\eta $', 25, -5.0, 5.0)

my_labels = {
    'tW_scattering': 'top-W scat.',
    'topW_v2': 'top-W scat.',
    'topW_v3': 'top-W scat.',
    'topW_EFT_cp8': 'SMEFT cp=8',
    'topW_EFT_mix': 'SMEFT mix',
}

my_colors = {
    'tW_scattering': '#FF595E',
    'topW_v2': '#FF595E',
    'topW_v3': '#FF595E',
    'topW_EFT_cp8': '#525B76',
    'topW_EFT_mix': '#6A4C93',
}

In [None]:
makePlot(output, 'nGenL', 'multiplicity',
         data_sel=None,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{gen\ lep}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )
makePlot(output, 'nGenL', 'multiplicity',
         data_sel=None,
         shape=True,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{gen\ lep}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
makePlot(output, 'nLepFromTop', 'multiplicity',
         data_sel=None,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ top}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'nLepFromTop', 'multiplicity',
         data_sel=None,
         shape=True,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ top}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'nLepFromW', 'multiplicity',
         data_sel=None,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ W}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'nLepFromW', 'multiplicity',
         data_sel=None,
         shape=True,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ W}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'nLepFromZ', 'multiplicity',
         data_sel=None,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ Z}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'nLepFromZ', 'multiplicity',
         data_sel=None,
         shape=True,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ Z}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'nLepFromTau', 'multiplicity',
         data_sel=None,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ \tau}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'nLepFromTau', 'multiplicity',
         data_sel=None,
         shape=True,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{lep\ from\ \tau}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'nGenTau', 'multiplicity',
         data_sel=None,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{gen\ \tau}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'nGenTau', 'multiplicity',
         data_sel=None,
         shape=True,
         bins=N_bins_red, log=True, normalize=False, axis_label=r'$N_{gen\ \tau}$',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'lead_gen_lep', 'pt',
         data_sel=None,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (lead gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )
makePlot(output, 'lead_gen_lep', 'pt',
         data_sel=None,
         shape=True,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (lead gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
makePlot(output, 'lead_gen_lep', 'eta',
         data_sel=None,
         bins=eta_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (lead gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'lead_gen_lep', 'eta',
         data_sel=None,
         shape=True,
         bins=eta_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (lead gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'trail_gen_lep', 'pt',
         data_sel=None,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'trail_gen_lep', 'pt',
         data_sel=None,
         shape=True,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'trail_gen_lep', 'eta',
         data_sel=None,
         bins=eta_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )
makePlot(output, 'trail_gen_lep', 'eta',
         data_sel=None,
         shape=True,
         bins=eta_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail gen lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown']
        )

In [None]:
makePlot(output, 'lead_lep', 'pt',
         data_sel=None,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (lead lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )
makePlot(output, 'lead_lep', 'pt',
         data_sel=None,
         shape=True,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (lead lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
makePlot(output, 'trail_lep', 'pt',
         data_sel=None,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )
makePlot(output, 'trail_lep', 'pt',
         data_sel=None,
         shape=True,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
makePlot(output, 'fwd_jet', 'pt',
         data_sel=None,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )
makePlot(output, 'fwd_jet', 'pt',
         data_sel=None,
         shape=True,
         bins=pt_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
p_bins = hist.Bin('p', r'$p_{T}\ (GeV)$', 25, 0, 2500)

makePlot(output, 'high_p_fwd_p', 'p',
         data_sel=None,
         shape=True,
         bins=p_bins, log=True, normalize=False, axis_label=r'$p_{T}$ (trail lep) (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
makePlot(output, 'MET', 'pt',
         data_sel=None,
         shape=True,
         bins=pt_bins_coarse, log=True, normalize=False, axis_label=r'$p_{T}^{miss}$ (GeV)',
         #upHists=['pt_jesTotalUp'], downHists=['pt_jesTotalDown'],
         new_colors=my_colors, new_labels=my_labels,
        )

In [None]:
import uproot
fin = uproot.open('/home/users/dspitzba/TTW/CMSSW_10_2_9/src/inclusive_1000.root')

In [None]:
tree = fin["Events"]

In [None]:
tree.Array("genWeight")

In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection

# the below command will change to .from_root in coffea v0.7.0
events_incl = NanoEventsFactory.from_root('/hadoop/cms/store/user/dspitzba/ProjectMetis/TTWJetsToLNuEWK_5f_NLO_RunIIAutumn18_NANO_v2/nanoAOD_1.root', schemaclass=NanoAODSchema).events()

from Tools.objects import *

sum_weight = sum(events_incl.genWeight)
sum_weight

In [None]:
sum(events_incl[((ak.num(muon)+ak.num(electron))==2)].genWeight)/sum_weight

In [None]:
events_plus = NanoEventsFactory.from_root('/hadoop/cms/store/user/dspitzba/ProjectMetis/TTWplusJetsToLNuEWK_5f_NLO_v2_RunIIAutumn18_NANO_v4/nanoAOD_1.root', schemaclass=NanoAODSchema).events()

sum_weight = sum(events_plus.genWeight)
sum_weight

In [None]:
from Tools.basic_objects import *

In [None]:
jet = getJets(events_incl)
fwd = getFwdJet(jet)

In [None]:
lead_fwd = ak.pad_none(fwd, 1, clip=True)
ak.flatten(lead_fwd)

In [None]:
ak.flatten(ak.pad_none(jet, 1, clip=True))

In [None]:
ak.pad_none(jet, 1, clip=True)[(ak.num(fwd)==0)]

In [None]:
ak.flatten(lead_fwd) + ak.flatten((ak.num(fwd)==0)*ak.pad_none(jet, 1, clip=True))

In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection

# the below command will change to .from_root in coffea v0.7.0
events_incl = NanoEventsFactory.from_root('/hadoop/cms/store/user/dspitzba/ProjectMetis/TTWJetsToLNuEWK_5f_NLO_RunIIAutumn18_NANO_v2/nanoAOD_1.root', schemaclass=NanoAODSchema)
from Tools.selection import Selection

In [None]:
ev = events_incl.events()

In [None]:
ev.HLT.Ele15_Ele8_CaloIdL_TrackIdL_IsoVL

In [None]:
import uproot
tree = uproot.open('/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/topW_v0.2.3/ProjectMetis_TTWJetsToLNuEWK_5f_EFT_mix_myNLO_full_RunIIAutumn18_NANO_v4//nanoSkim_1.root')

In [None]:
tree['Events'].show()

In [None]:
ev = tree['Events']

In [None]:
[ b.name for b in ev.branches ]