In [4]:
import os
import ROOT 
import pandas as pd
from math import pow, sqrt
from ctypes import c_double
from pprint import pprint

In [17]:
ERA = "2016preVFP"
CHANNEL = "Skim1E2Mu"
MEASURE = "HighPT"
histkey = "ZCand/mass"

WORKDIR = "/home/choij/workspace/ChargedHiggsAnalysis"

DATASTREAM = ""
REGION = ""
if CHANNEL == "Skim1E2Mu": 
    DATASTREAM = "MuonEG"
    REGION = "ZGamma1E2Mu"
if CHANNEL == "Skim3Mu": 
    DATASTREAM = "DoubleMuon"
    REGION = "ZGamma3Mu"
assert DATASTREAM in ["MuonEG", "DoubleMuon"]
assert REGION in ["ZGamma1E2Mu", "ZGamma3Mu"]

CONV = ["DYJets_MG", "DYJets10to50_MG", "ZGToLLG"]
DIBOSON = ["WZTo3LNu_amcatnlo","ZZTo4L_powheg"]
TTX     = ["ttWToLNu", "ttZToLLNuNu", "ttHToNonbb"]
RARE    = ["WWW", "WWZ", "WZZ", "ZZZ", "tZq", "TTG", "tHq", "TTTT", "WWG", "VBF_HToZZTo4L", "GluGluHToZZTo4L"]
#RARE_noskim = ["tHq", "TTTT", "WWG"]
MCList = CONV + DIBOSON + TTX + RARE

SYSTs = []
if CHANNEL == "Skim1E2Mu":
    SYSTs = [["NonpromptUp", "NonpromptDown"],
             ["L1PrefireUp", "L1PrefireDown"],
             ["PileupReweightUp", "PileupReweightDown"],
             ["MuonIDSFUp", "MuonIDSFDown"],
             ["ElectronIDSFUp", "ElectronIDSFDown"],
             ["EMuTrigSFUp", "EMuTrigSFDown"],
             ["JetResUp", "JetResDown"],
             ["JetEnUp", "JetEnDown"],
             ["ElectronResUp", "ElectronResDown"],
             ["ElectronEnUp", "ElectronEnDown"],
             ["MuonEnUp", "MuonEnDown"],
             ["HeavyTagUpCorr", "HeavyTagDownCorr"],
             ["HeavyTagUpUnCorr", "HeavyTagDownUnCorr"],
             ["LightTagUpCorr", "LightTagDownCorr"],
             ["LightTagUpUnCorr", "LightTagDownUnCorr"]]
if CHANNEL == "Skim3Mu":
    SYSTs = [["NonpromptUp", "NonpromptDown"],
             ["L1PrefireUp", "L1PrefireDown"],
             ["PileupReweightUp", "PileupReweightDown"],
             ["MuonIDSFUp", "MuonIDSFDown"],
             ["DblMuTrigSFUp", "DblMuTrigSFDown"],
             ["JetResUp", "JetResDown"],
             ["JetEnUp", "JetEnDown"],
             ["ElectronResUp", "ElectronResDown"],
             ["ElectronEnUp", "ElectronEnDown"],
             ["MuonEnUp", "MuonEnDown"],
             ["HeavyTagUpCorr", "HeavyTagDownCorr"],
             ["HeavyTagUpUnCorr", "HeavyTagDownUnCorr"],
             ["LightTagUpCorr", "LightTagDownCorr"],
             ["LightTagUpUnCorr", "LightTagDownUnCorr"]]


In [18]:
# make a table
data = {}

# make index
indexCol = ["sample", "Central", "Stat"]
for syst in SYSTs:
    indexCol.append(syst[0])
    indexCol.append(syst[1])
indexCol.append("Total")

for index in indexCol:
    data[index] = []

In [19]:
def estTotalErr(sample, sampleDict):
    # find the index of the sample
    idx = sampleDict["sample"].index(sample)
    central = sampleDict["Central"][idx]
    totalErr = pow(sampleDict["Stat"][idx], 2)
    for syst in SYSTs:
        # initialized as False
        if not sampleDict[syst[0]][idx]: continue
        systUp = abs(sampleDict[syst[0]][idx] - central)
        systDown = abs(sampleDict[syst[1]][idx] - central)
        totalErr += pow(max(systUp, systDown), 2)
    
    return sqrt(totalErr)

In [20]:
# data
data["sample"].append("data")

f = ROOT.TFile.Open(f"{WORKDIR}/data/MeasConversion/{ERA}/{CHANNEL}__/DATA/MeasConversion_SkimTree_SS2lOR3l_{DATASTREAM}.root")
h = f.Get(f"{REGION}/{MEASURE}/Central/{histkey}"); h.SetDirectory(0)

stat = c_double()
rate = h.IntegralAndError(0, h.GetNbinsX()+1, stat)
data["Central"].append(rate)
data["Stat"].append(stat.value)

for index in indexCol[3:-1]:
    data[index].append(False)
data["Total"].append(estTotalErr("data", data))

f.Close()

In [21]:
# nonprompt
data["sample"].append("nonprompt")

f = ROOT.TFile.Open(f"{WORKDIR}/data/MeasConvMatrix/{ERA}/{CHANNEL}__/DATA/MeasConvMatrix_SkimTree_SS2lOR3l_{DATASTREAM}.root")
h = f.Get(f"{REGION}/{MEASURE}/Central/{histkey}"); h.SetDirectory(0)
h_up = f.Get(f"{REGION}/{MEASURE}/NonpromptUp/{histkey}"); h.SetDirectory(0)
h_down = f.Get(f"{REGION}/{MEASURE}/NonpromptDown/{histkey}"); h.SetDirectory(0)

stat = c_double()
rate = h.IntegralAndError(0, h.GetNbinsX(), stat)
data["Central"].append(rate)
data["Stat"].append(stat.value)

# systematics
#data["NonpromptUp"].append(h_up.Integral())
#data["NonpromptDown"].append(h_down.Integral())
data["NonpromptUp"].append(rate*1.4)
data["NonpromptDown"].append(rate*0.6)

for index in indexCol[5:-1]:
    data[index].append(False)
data["Total"].append(estTotalErr("nonprompt", data))

f.Close()

In [22]:
for mc in MCList:
    fkey = ""
    #if mc in RARE_noskim:
    #    fkey = f"{WORKDIR}/data/MeasConversion/{ERA}/{CHANNEL}__/MeasConversion_{mc}.root"
    #else:
    fkey = f"{WORKDIR}/data/MeasConversion/{ERA}/{CHANNEL}__/MeasConversion_SkimTree_SS2lOR3l_{mc}.root"
    print(mc)
    assert os.path.exists(fkey)
    f = ROOT.TFile.Open(fkey)
    try:
        h = f.Get(f"{REGION}/{MEASURE}/Central/{histkey}"); h.SetDirectory(0)
    except:
        print(mc); continue
    
    data["sample"].append(mc)
    # fill each row
    stat = c_double()
    rate = h.IntegralAndError(0, h.GetNbinsX()+1, stat)
    data["Central"].append(rate)
    data["Stat"].append(stat.value)
    data['NonpromptUp'].append(False)
    data["NonpromptDown"].append(False)
    
    for index in indexCol[5:-1]:
        try:
            h_syst = f.Get(f"{REGION}/{MEASURE}/{index}/{histkey}")
            h_syst.SetDirectory(0)
            data[index].append(h_syst.Integral())
        except:
            data[index].append(0.)
    f.Close()
    
    data["Total"].append(estTotalErr(mc, data))

DYJets_MG
DYJets10to50_MG
DYJets10to50_MG
ZGToLLG
WZTo3LNu_amcatnlo
ZZTo4L_powheg
ttWToLNu
ttZToLLNuNu
ttHToNonbb
WWW
WWZ
WZZ
ZZZ
tZq
TTG
TTG
tHq
TTTT
WWG
VBF_HToZZTo4L
GluGluHToZZTo4L


In [23]:
# make dataframe
df = pd.DataFrame(data)
df.set_index("sample", inplace=True)
df = df.transpose()

pprint(df)

sample                   data  nonprompt  DYJets_MG    ZGToLLG  \
Central                 209.0  50.893093  55.210415  59.346413   
Stat                14.456832   4.434676   7.819259   2.360464   
NonpromptUp             False   71.25033      False      False   
NonpromptDown           False  30.535856      False      False   
L1PrefireUp             False      False  54.826726  58.928051   
L1PrefireDown           False      False  55.589561  59.760373   
PileupReweightUp        False      False   54.58416  58.153139   
PileupReweightDown      False      False  56.066233  60.590955   
MuonIDSFUp              False      False  55.548239  59.695176   
MuonIDSFDown            False      False  54.873819  58.998882   
ElectronIDSFUp          False      False  55.928914  60.160837   
ElectronIDSFDown        False      False  54.495065   58.53565   
EMuTrigSFUp             False      False  55.210415  59.346413   
EMuTrigSFDown           False      False  55.210415  59.346413   
JetResUp  

In [24]:
df

sample,data,nonprompt,DYJets_MG,ZGToLLG,WZTo3LNu_amcatnlo,ZZTo4L_powheg,ttWToLNu,ttZToLLNuNu,ttHToNonbb,WWW,WWZ,WZZ,ZZZ,tZq,tHq,TTTT,WWG,VBF_HToZZTo4L,GluGluHToZZTo4L
Central,209.0,50.893093,55.210415,59.346413,5.526489,11.075563,0.03118,0.06387,0.045692,0.080202,0.027627,0.009941,0.001184,0.031696,0.052588,0.000429,0.00354,0.033847,0.252994
Stat,14.456832,4.434676,7.819259,2.360464,0.373557,0.076083,0.012788,0.012553,0.009616,0.008155,0.004139,0.001438,0.000248,0.01138,0.006835,0.000237,0.001225,0.001104,0.007389
NonpromptUp,False,71.25033,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
NonpromptDown,False,30.535856,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
L1PrefireUp,False,False,54.826726,58.928051,5.482792,10.995326,0.031083,0.063018,0.045323,0.079679,0.027283,0.009845,0.001165,0.031354,0.052132,0.000422,0.003488,0.033428,0.251039
L1PrefireDown,False,False,55.589561,59.760373,5.569883,11.155335,0.031279,0.064718,0.046061,0.080725,0.027975,0.010036,0.001203,0.032042,0.053042,0.000436,0.003589,0.034267,0.25493
PileupReweightUp,False,False,54.58416,58.153139,5.497934,10.881863,0.030357,0.060961,0.044267,0.080197,0.027858,0.010189,0.001195,0.029777,0.052993,0.000401,0.003454,0.033414,0.251607
PileupReweightDown,False,False,56.066233,60.590955,5.551009,11.266205,0.032073,0.066597,0.046876,0.079856,0.027409,0.009723,0.001196,0.033072,0.051614,0.000456,0.003584,0.034315,0.253943
MuonIDSFUp,False,False,55.548239,59.695176,5.557347,11.140624,0.031292,0.064242,0.045915,0.080589,0.027799,0.009991,0.001194,0.031885,0.052871,0.00043,0.003563,0.03402,0.254392
MuonIDSFDown,False,False,54.873819,58.998882,5.495742,11.010734,0.031067,0.0635,0.04547,0.079817,0.027455,0.009891,0.001174,0.031507,0.052305,0.000427,0.003517,0.033676,0.251601


In [25]:
# measure scale factor
data_convsf = {}

# central value
rate_data = df.loc["Central", "data"]
rate_conv = df.loc["Central", "DYJets_MG"] if MEASURE == "LowPT" else df.loc["Central", "ZGToLLG"]
rate_pred = df.loc["Central", "nonprompt"]
for mc in MCList:
    if mc in CONV: continue
    if not mc in df.columns: continue
    print(mc, df.loc["Central", mc])
    rate_pred += df.loc["Central", mc]
convsf = (rate_data - rate_pred) / rate_conv
data_convsf["Central"] = convsf

WZTo3LNu_amcatnlo 5.526488911223838
ZZTo4L_powheg 11.07556331426849
ttWToLNu 0.031179761258257263
ttZToLLNuNu 0.06387000695989421
ttHToNonbb 0.045692246497921585
WWW 0.08020234779166763
WWZ 0.02762669378367341
WZZ 0.009940578014910339
ZZZ 0.0011839254854386836
tZq 0.031695863632642984
tHq 0.05258775110672969
TTTT 0.00042881390131332264
WWG 0.0035400590621262836
VBF_HToZZTo4L 0.03384749337847132
GluGluHToZZTo4L 0.2529940908657288


In [26]:
thisConv = "DYJets_MG" if MEASURE == "LowPT" else "ZGToLLG"

# stat
dNconv = df.loc["Stat", thisConv]
dNpred = df.loc["Stat", "nonprompt"]
for mc in MCList:
    if mc in CONV: continue
    if not mc in df.columns: continue
    dNpred += df.loc["Stat", mc]
data_convsf["Stat"] = -(dNpred / rate_conv) + (rate_pred / pow(rate_conv, 2))*dNconv

# systatmatics
for syst in indexCol[3:-1]:
    dNconv = 0.
    if df.loc[syst, thisConv]:
        dNconv += df.loc[syst, thisConv] - df.loc["Central", thisConv]
        
    dNpred = 0.
    if df.loc[syst, 'nonprompt']: dNpred += df.loc[syst, 'nonprompt'] - df.loc['Central', 'nonprompt']
    for mc in MCList:
        if mc in CONV: continue
        if not mc in df.columns: continue
        if df.loc[syst, mc]: dNpred += df.loc[syst, mc] - df.loc['Central', mc]
    data_convsf[syst] = -(dNpred / rate_conv) + (rate_pred / pow(rate_conv, 2))*dNconv

totalErr = pow(data_convsf['Stat'], 2)
for syst in SYSTs:
    thisErr = max( abs(data_convsf[syst[0]]), abs(data_convsf[syst[1]]) )
    totalErr += pow(thisErr, 2)
data_convsf["Total"] = sqrt(totalErr)

In [27]:
pprint(data_convsf)

{'Central': 2.3736913351844176,
 'EMuTrigSFDown': 0.0,
 'EMuTrigSFUp': 0.0,
 'ElectronEnDown': -0.002522980927055947,
 'ElectronEnUp': 0.002731981692834813,
 'ElectronIDSFDown': -0.012100068853258412,
 'ElectronIDSFUp': 0.01215482934193511,
 'ElectronResDown': 0.0,
 'ElectronResUp': -3.287389482902453e-05,
 'HeavyTagDownCorr': 0.00030523071297500193,
 'HeavyTagDownUnCorr': 0.0002920555170274963,
 'HeavyTagUpCorr': -0.00031836793971400936,
 'HeavyTagUpUnCorr': -0.0003069476398697294,
 'JetEnDown': 0.0010916919505572645,
 'JetEnUp': -0.003357204654684067,
 'JetResDown': 0.0025560763486634102,
 'JetResUp': -0.0008814291495284666,
 'L1PrefireDown': 0.005839403383445211,
 'L1PrefireUp': -0.0059113167679580286,
 'LightTagDownCorr': 0.002605723151035213,
 'LightTagDownUnCorr': 0.0027667872618732823,
 'LightTagUpCorr': -0.0025961889835742716,
 'LightTagUpUnCorr': -0.0027560486566602475,
 'MuonEnDown': -0.0010375000917365854,
 'MuonEnUp': 0.0020966512045720712,
 'MuonIDSFDown': -0.0050551645588