In [1]:
from utility_dfcounter import *

## 1. count nominal

In [2]:
def countDataFrames(variation=""):
    labels  = ["trigger","usetag","acc","accVar","nmcbg","nmcbgVar","nfake","nfakeVar","ndata","ndataVar"]
    records = []
    
    for trigger in ["mu","e"]:
        for usetag in ["1b","2b"]:
            
            print( "counting "+trigger+usetag + " ...")

            counter = DFCounter(trigger,usetag)
            counter.setVariation(variation)

            acc,accVar = counter.returnAcc()
            nmcbg,nmcbgVar = counter.returnNMCbg()
            nfake,nfakeVar = counter.returnNFake()
            ndata,ndataVar = counter.returnNData()
            records.append( (trigger,usetag,acc,accVar,nmcbg,nmcbgVar,nfake,nfakeVar,ndata,ndataVar) )

    df = pd.DataFrame.from_records(records, columns=labels)
    df.to_pickle( common.getBaseDirectory() + "data/counts/count_{}.pkl".format(variation))
    print( "counting finished!")
    
def countDataFrames_selection(variation=""):
    labels  = ["selection","nbjet","acc","accVar","nmcbg","nmcbgVar","nfake","nfakeVar","ndata","ndataVar"]
    records = []
    
    for selection in ["mumu","ee"]:
        for nbjet in ["==1",">1"]:

            counter = DFCounter_selection(selection,nbjet)
            counter.setVariation(variation)

            acc,accVar = counter.returnAcc()
            nmcbg,nmcbgVar = counter.returnNMCbg()
            nfake,nfakeVar = np.zeros(2), np.zeros(2)
            ndata,ndataVar = counter.returnNData()
            records.append( (selection,nbjet,acc,accVar,nmcbg,nmcbgVar,nfake,nfakeVar,ndata,ndataVar) )

    df = pd.DataFrame.from_records(records, columns=labels)
    df.to_pickle( common.getBaseDirectory() + "data/counts/DNNcount_{}.pkl".format(variation))
    print( "counting finished!")

In [3]:
#countDataFrames()

## 2. count with variation

In [4]:
import multiprocessing as mp

def runCountDataFrames(vlist):
    processes = []
    for v in vlist:
        processes.append(mp.Process(target=countDataFrames,args=(v,)))
    for ps in processes:
        ps.start()
    for ps in processes:
        ps.join()

In [None]:

# runCountDataFrames(['EPtDown','MuPtDown','TauPtDown',"TTXSUp","TWXSUp","",
#                     "JESUp","JESDown","JERUp","JERDown",
#                     "BTagUp","BTagDown","MistagUp","MistagDown"]) 

# runCountDataFrames(["RenormUp","RenormDown","FactorUp","FactorDown","PDFUp","PDFDown",
#                     'FSRUp','FSRDown','ISRUp','ISRDown','UEUp','UEDown','MEPSUp','MEPSDown']) 

runCountDataFrames(["RenormUp","RenormDown","FactorUp","FactorDown"]) # ttLHEWeight
runCountDataFrames(["PDFUp","PDFDown","TTXSUp","TWXSUp"]) # ttLHEWeight

# runCountDataFrames(['FSRUp','FSRDown','ISRUp','ISRDown']) # ttTheory
# runCountDataFrames(['UEUp','UEDown','MEPSUp','MEPSDown']) # ttTheory

runCountDataFrames(['EPtDown','MuPtDown','TauPtDown',""]) # lepEnergy
runCountDataFrames(["JESUp","JESDown","JERUp","JERDown"]) # jetEnergy
runCountDataFrames(["BTagUp","BTagDown","MistagUp","MistagDown"]) # bTagging

counting mu1b ...
counting mu1b ...
counting mu1b ...
counting mu1b ...
counting mu2b ...
counting mu2b ...
counting mu2b ...
counting mu2b ...
counting e1b ...
counting e1b ...
counting e1b ...
counting e1b ...
counting e2b ...
counting e2b ...
counting e2b ...
counting e2b ...
counting finished!
counting finished!
counting finished!
counting finished!
counting mu1b ...
counting mu1b ...
counting mu1b ...
counting mu1b ...
counting mu2b ...
counting mu2b ...
counting mu2b ...
counting mu2b ...
counting e1b ...
counting e1b ...
counting e1b ...
counting e1b ...
counting e2b ...
counting e2b ...
counting e2b ...
counting e2b ...
counting finished!
counting finished!
counting finished!
counting finished!
counting mu1b ...
counting mu1b ...
counting mu1b ...
counting mu1b ...


## 3. Count uncertainty of Tau acc

In [2]:
# jet misID
df = DFCutter('etau','>1',"mctt").getDataFrame()
np.sum( df.eventWeight*(1+0.002*df.lepton2_pt) )/ np.sum(df.eventWeight)

1.07965072853603

# 4. Print the Expect Yields Table

In [35]:
ct = DFCounter('e','1b')
selection = 'e4j'

total,totalVar = 0,0


df = DFCutter(selection,ct.nbjet,"mctt").getDataFrame()
n,nVar = np.sum(df.eventWeight),np.sum(df.eventWeight**2)
print("{:8.1f} +/- {:4.1f}".format(n,nVar**0.5) )
total += n
totalVar += nVar

df = DFCutter(selection,ct.nbjet,"mct").getDataFrame()
n,nVar = np.sum(df.eventWeight),np.sum(df.eventWeight**2)
print("{:8.1f} +/- {:4.1f}".format(n,nVar**0.5) )
total += n
totalVar += nVar

df = DFCutter(selection,ct.nbjet,"mcdy").getDataFrame()
n,nVar = np.sum(df.eventWeight),np.sum(df.eventWeight**2)
print("{:8.1f} +/- {:4.1f}".format(n,nVar**0.5) )
total += n
totalVar += nVar

df = DFCutter(selection,ct.nbjet,"mcdiboson").getDataFrame()
n,nVar = np.sum(df.eventWeight),np.sum(df.eventWeight**2)
print("{:8.1f} +/- {:4.1f}".format(n,nVar**0.5) )
total += n
totalVar += nVar

n, nVar = ct.getNFake(selection,ct.nbjet)
print("{:8.1f} +/- {:4.1f}".format(n,nVar**0.5) )
total += n
totalVar += nVar

print("{:8.1f} +/- {:4.1f}".format(total,totalVar**0.5) )
df = DFCutter(selection,ct.nbjet,"data2016").getDataFrame()
print("{:8.1f} +/- {:4.1f}".format(np.sum(df.eventWeight),np.sum(df.eventWeight**2)**0.5) )

363821.2 +/- 262.8
 15811.5 +/- 143.6
 22671.7 +/- 192.6
   284.5 +/-  2.9
  8525.8 +/- 37.9
411114.7 +/- 358.1
412463.0 +/- 642.2
