In [1]:
import os
import ROOT 
import pandas as pd
from math import pow, sqrt
from ctypes import c_double
from pprint import pprint

Welcome to JupyROOT 6.28/04


In [13]:
ERA = "2017"
CHANNEL = "Skim1E2Mu"
#MEASURE = "LowPT"
histkey = "ZCand/mass"

WORKDIR = "/home/choij/workspace/ChargedHiggsAnalysis"

DATASTREAM = ""
REGION = ""
if CHANNEL == "Skim1E2Mu": 
    DATASTREAM = "MuonEG"
    REGION = "ZGamma1E2Mu"
if CHANNEL == "Skim3Mu": 
    DATASTREAM = "DoubleMuon"
    REGION = "ZGamma3Mu"
assert DATASTREAM in ["MuonEG", "DoubleMuon"]
assert REGION in ["ZGamma1E2Mu", "ZGamma3Mu"]

CONV = ["DYJets_MG", "DYJets10to50_MG", "ZGToLLG"]
DIBOSON = ["WZTo3LNu_amcatnlo","ZZTo4L_powheg"]
TTX     = ["ttWToLNu", "ttZToLLNuNu", "ttHToNonbb"]
RARE    = ["WWW", "WWZ", "WZZ", "ZZZ", "tZq", "TTG", "tHq", "TTTT", "WWG", "VBF_HToZZTo4L", "GluGluHToZZTo4L"]
#RARE_noskim = ["tHq", "TTTT", "WWG"]
MCList = CONV + DIBOSON + TTX + RARE

SYSTs = []
if CHANNEL == "Skim1E2Mu":
    SYSTs = [["NonpromptUp", "NonpromptDown"],
             ["L1PrefireUp", "L1PrefireDown"],
             ["PileupReweightUp", "PileupReweightDown"],
             ["MuonIDSFUp", "MuonIDSFDown"],
             ["ElectronIDSFUp", "ElectronIDSFDown"],
             ["EMuTrigSFUp", "EMuTrigSFDown"],
             ["JetResUp", "JetResDown"],
             ["JetEnUp", "JetEnDown"],
             ["ElectronResUp", "ElectronResDown"],
             ["ElectronEnUp", "ElectronEnDown"],
             ["MuonEnUp", "MuonEnDown"],
             ["HeavyTagUpCorr", "HeavyTagDownCorr"],
             ["HeavyTagUpUnCorr", "HeavyTagDownUnCorr"],
             ["LightTagUpCorr", "LightTagDownCorr"],
             ["LightTagUpUnCorr", "LightTagDownUnCorr"]]
if CHANNEL == "Skim3Mu":
    SYSTs = [["NonpromptUp", "NonpromptDown"],
             ["L1PrefireUp", "L1PrefireDown"],
             ["PileupReweightUp", "PileupReweightDown"],
             ["MuonIDSFUp", "MuonIDSFDown"],
             ["DblMuTrigSFUp", "DblMuTrigSFDown"],
             ["JetResUp", "JetResDown"],
             ["JetEnUp", "JetEnDown"],
             ["ElectronResUp", "ElectronResDown"],
             ["ElectronEnUp", "ElectronEnDown"],
             ["MuonEnUp", "MuonEnDown"],
             ["HeavyTagUpCorr", "HeavyTagDownCorr"],
             ["HeavyTagUpUnCorr", "HeavyTagDownUnCorr"],
             ["LightTagUpCorr", "LightTagDownCorr"],
             ["LightTagUpUnCorr", "LightTagDownUnCorr"]]


In [14]:
# make a table
data = {}

# make index
indexCol = ["sample", "Central", "Stat"]
for syst in SYSTs:
    indexCol.append(syst[0])
    indexCol.append(syst[1])
indexCol.append("Total")

for index in indexCol:
    data[index] = []

In [15]:
def estTotalErr(sample, sampleDict):
    # find the index of the sample
    idx = sampleDict["sample"].index(sample)
    central = sampleDict["Central"][idx]
    totalErr = pow(sampleDict["Stat"][idx], 2)
    for syst in SYSTs:
        # initialized as False
        if not sampleDict[syst[0]][idx]: continue
        systUp = abs(sampleDict[syst[0]][idx] - central)
        systDown = abs(sampleDict[syst[1]][idx] - central)
        totalErr += pow(max(systUp, systDown), 2)
    
    return sqrt(totalErr)

In [16]:
# data
data["sample"].append("data")

f = ROOT.TFile.Open(f"{WORKDIR}/data/MeasConversion/{ERA}/{CHANNEL}__/DATA/MeasConversion_SkimTree_SS2lOR3l_{DATASTREAM}.root")
h = f.Get(f"{REGION}/Central/{histkey}"); h.SetDirectory(0)

stat = c_double()
rate = h.IntegralAndError(0, h.GetNbinsX()+1, stat)
data["Central"].append(rate)
data["Stat"].append(stat.value)

for index in indexCol[3:-1]:
    data[index].append(False)
data["Total"].append(estTotalErr("data", data))

f.Close()

In [17]:
# nonprompt
data["sample"].append("nonprompt")

f = ROOT.TFile.Open(f"{WORKDIR}/data/MeasConvMatrix/{ERA}/{CHANNEL}__/DATA/MeasConvMatrix_SkimTree_SS2lOR3l_{DATASTREAM}.root")
f_conv = ROOT.TFile.Open(f"{WORKDIR}/data/MeasConvMatrix/{ERA}/{CHANNEL}__/MeasConvMatrix_SkimTree_SS2lOR3l_DYJets_MG.root")
h = f.Get(f"{REGION}/Central/{histkey}"); h.SetDirectory(0)
h_conv = f_conv.Get(f"{REGION}/Central/{histkey}"); h_conv.SetDirectory(0)
f.Close()
f_conv.Close()
print(h.Integral(), h_conv.Integral(), h_conv.Integral()/h.Integral())
h.Add(h_conv, -1)

#h_up = f.Get(f"{REGION}/NonpromptUp/{histkey}"); h.SetDirectory(0)
#h_down = f.Get(f"{REGION}/NonpromptDown/{histkey}"); h.SetDirectory(0)

stat = c_double()
rate = h.IntegralAndError(0, h.GetNbinsX(), stat)
data["Central"].append(rate)
data["Stat"].append(stat.value)

# systematics
#data["NonpromptUp"].append(h_up.Integral())
#data["NonpromptDown"].append(h_down.Integral())
data["NonpromptUp"].append(rate*1.3)
data["NonpromptDown"].append(rate*0.7)

for index in indexCol[5:-1]:
    data[index].append(False)
data["Total"].append(estTotalErr("nonprompt", data))


457.7429910859848 168.43011713393042 0.36795782876835276


In [18]:
for mc in MCList:
    fkey = ""
    #if mc in RARE_noskim:
    #    fkey = f"{WORKDIR}/data/MeasConversion/{ERA}/{CHANNEL}__/MeasConversion_{mc}.root"
    #else:
    fkey = f"{WORKDIR}/data/MeasConversion/{ERA}/{CHANNEL}__/MeasConversion_SkimTree_SS2lOR3l_{mc}.root"
    print(mc)
    assert os.path.exists(fkey)
    f = ROOT.TFile.Open(fkey)
    try:
        h = f.Get(f"{REGION}/Central/{histkey}"); h.SetDirectory(0)
    except:
        print(mc); continue
    
    data["sample"].append(mc)
    # fill each row
    stat = c_double()
    rate = h.IntegralAndError(0, h.GetNbinsX()+1, stat)
    data["Central"].append(rate)
    data["Stat"].append(stat.value)
    data['NonpromptUp'].append(False)
    data["NonpromptDown"].append(False)
    
    for index in indexCol[5:-1]:
        try:
            h_syst = f.Get(f"{REGION}/{index}/{histkey}")
            h_syst.SetDirectory(0)
            data[index].append(h_syst.Integral())
        except:
            data[index].append(0.)
    f.Close()
    
    data["Total"].append(estTotalErr(mc, data))

DYJets_MG
DYJets10to50_MG
DYJets10to50_MG
ZGToLLG
WZTo3LNu_amcatnlo
ZZTo4L_powheg
ttWToLNu
ttZToLLNuNu
ttHToNonbb
WWW
WWZ
WZZ
ZZZ
tZq
TTG
tHq
TTTT
WWG
VBF_HToZZTo4L
GluGluHToZZTo4L


In [19]:
# make dataframe
df = pd.DataFrame(data)
df.set_index("sample", inplace=True)
df = df.transpose()

pprint(df)

sample                   data   nonprompt    DYJets_MG     ZGToLLG  \
Central                1471.0  289.312874  1219.517066  884.666863   
Stat                38.353618     17.6886     51.56099   11.486147   
NonpromptUp             False  376.106736        False       False   
NonpromptDown           False  202.519012        False       False   
L1PrefireUp             False       False  1213.864524  881.285105   
L1PrefireDown           False       False  1225.116388   888.03928   
PileupReweightUp        False       False  1193.421468  864.232317   
PileupReweightDown      False       False   1246.39472  903.706473   
MuonIDSFUp              False       False  1228.576293  890.353893   
MuonIDSFDown            False       False   1210.49508  879.002423   
ElectronIDSFUp          False       False  1258.825704  915.828648   
ElectronIDSFDown        False       False  1180.899857  854.078103   
EMuTrigSFUp             False       False  1219.517066  884.666863   
EMuTrigSFDown       

In [20]:
df

sample,data,nonprompt,DYJets_MG,ZGToLLG,WZTo3LNu_amcatnlo,ZZTo4L_powheg,ttWToLNu,ttZToLLNuNu,ttHToNonbb,WWW,WWZ,WZZ,ZZZ,tZq,TTG,tHq,TTTT,WWG,VBF_HToZZTo4L,GluGluHToZZTo4L
Central,1471.0,289.312874,1219.517066,884.666863,32.047867,90.473687,0.180313,0.200002,0.174471,0.356077,0.07593,0.027969,0.002431,0.120182,0.261064,0.191414,0.000211,0.004772,0.12263,1.276794
Stat,38.353618,17.6886,51.56099,11.486147,1.212504,0.224997,0.024864,0.023649,0.018628,0.018266,0.053763,0.002652,0.0004,0.022829,0.187514,0.014242,0.000355,0.00157,0.003058,0.024263
NonpromptUp,False,376.106736,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
NonpromptDown,False,202.519012,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
L1PrefireUp,False,False,1213.864524,881.285105,31.884796,90.199815,0.179187,0.19856,0.172929,0.354742,0.074372,0.027801,0.002414,0.119321,0.259562,0.189782,0.000212,0.004765,0.121101,1.271222
L1PrefireDown,False,False,1225.116388,888.03928,32.209041,90.743896,0.181436,0.201427,0.176,0.357398,0.077491,0.028135,0.002448,0.120997,0.262506,0.19304,0.000211,0.004778,0.124166,1.282309
PileupReweightUp,False,False,1193.421468,864.232317,31.460715,88.890653,0.172702,0.196416,0.180525,0.356752,0.07444,0.027629,0.002317,0.111205,0.290441,0.19319,0.00027,0.004293,0.121883,1.273624
PileupReweightDown,False,False,1246.39472,903.706473,32.598391,91.997686,0.185959,0.200771,0.171485,0.356618,0.076478,0.028679,0.002504,0.129736,0.204658,0.188284,0.000143,0.005124,0.123342,1.280851
MuonIDSFUp,False,False,1228.576293,890.353893,32.338039,91.18106,0.181719,0.201637,0.175723,0.358703,0.076149,0.028163,0.002451,0.121174,0.262728,0.192911,0.000209,0.004803,0.12349,1.286409
MuonIDSFDown,False,False,1210.49508,879.002423,31.759311,89.769457,0.178914,0.198375,0.173224,0.353463,0.075711,0.027776,0.002411,0.119195,0.259408,0.189925,0.000213,0.00474,0.121774,1.267224


In [21]:
# measure scale factor
data_convsf = {}

# central value
rate_data = df.loc["Central", "data"]
rate_conv = df.loc["Central", "DYJets_MG"]
rate_pred = df.loc["Central", "nonprompt"]
for mc in MCList:
    if mc in CONV: continue
    if not mc in df.columns: continue
    print(mc, df.loc["Central", mc])
    rate_pred += df.loc["Central", mc]
convsf = (rate_data - rate_pred) / rate_conv
data_convsf["Central"] = convsf

WZTo3LNu_amcatnlo 32.04786664089114
ZZTo4L_powheg 90.47368745652996
ttWToLNu 0.1803133269341444
ttZToLLNuNu 0.20000172546116407
ttHToNonbb 0.1744711389298882
WWW 0.3560770769990151
WWZ 0.07592976748053551
WZZ 0.027968964124124675
ZZZ 0.002431030727932253
tZq 0.12018208783501383
TTG 0.26106432757655135
tHq 0.19141420337331144
TTTT 0.00021107788663679732
WWG 0.004771621659929707
VBF_HToZZTo4L 0.12263010427527453
GluGluHToZZTo4L 1.276793640512914


In [22]:
# stat
dNconv = df.loc["Stat", "DYJets_MG"]
dNpred = df.loc["Stat", "nonprompt"]
for mc in MCList:
    if mc in CONV: continue
    if not mc in df.columns: continue
    dNpred += df.loc["Stat", mc]
data_convsf["Stat"] = -(dNpred / rate_conv) + (rate_pred / pow(rate_conv, 2))*dNconv

# systatmatics
for syst in indexCol[3:-1]:
    dNconv = 0.
    if df.loc[syst, "DYJets_MG"]:
        dNconv += df.loc[syst, "DYJets_MG"] - df.loc["Central", "DYJets_MG"]
        
    dNpred = 0.
    if df.loc[syst, 'nonprompt']: dNpred += df.loc[syst, 'nonprompt'] - df.loc['Central', 'nonprompt']
    for mc in MCList:
        if mc in CONV: continue
        if not mc in df.columns: continue
        if df.loc[syst, mc]: dNpred += df.loc[syst, mc] - df.loc['Central', mc]
    data_convsf[syst] = -(dNpred / rate_conv) + (rate_pred / pow(rate_conv, 2))*dNconv

totalErr = pow(data_convsf['Stat'], 2)
for syst in SYSTs:
    thisErr = max( abs(data_convsf[syst[0]]), abs(data_convsf[syst[1]]) )
    totalErr += pow(thisErr, 2)
data_convsf["Total"] = sqrt(totalErr)

In [23]:
pprint(data_convsf)

{'Central': 0.8660570166317215,
 'EMuTrigSFDown': 0.0,
 'EMuTrigSFUp': 0.0,
 'ElectronEnDown': 3.214331568309367e-05,
 'ElectronEnUp': -0.001318004542313745,
 'ElectronIDSFDown': -0.007577042754901857,
 'ElectronIDSFUp': 0.0077101353066431864,
 'ElectronResDown': 0.0,
 'ElectronResUp': -1.7131289679191983e-05,
 'HeavyTagDownCorr': 0.0001463326244119721,
 'HeavyTagDownUnCorr': 0.00016464318051524735,
 'HeavyTagUpCorr': -0.0001493430669566357,
 'HeavyTagUpUnCorr': -0.00016850702243678738,
 'JetEnDown': 0.0012689761984560485,
 'JetEnUp': -0.0011000841444855974,
 'JetResDown': 0.0007952744582900624,
 'JetResUp': -0.0006965955714887406,
 'L1PrefireDown': 0.0011932504552021267,
 'L1PrefireUp': -0.0012033655947883788,
 'LightTagDownCorr': 0.0004956485274899713,
 'LightTagDownUnCorr': 0.0006149807462775143,
 'LightTagUpCorr': -0.0004943586977544765,
 'LightTagUpUnCorr': -0.000613005129128301,
 'MuonEnDown': -0.0005231142524364843,
 'MuonEnUp': -0.002318059756151129,
 'MuonIDSFDown': -0.0016844