In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import json
import pandas as pd
import numpy as np
import ROOT as rt
from root_numpy import hist2array
import sys

import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator, LogLocator, NullFormatter, LogFormatter)
from matplotlib.pyplot import cm
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle

from scipy.odr import *
from scipy import stats

Welcome to JupyROOT 6.14/04


In [3]:
# root TH1 to DataFrame and plot using matplotlib

class ISRPlotter :
    def __init__ (self, inputHistFilePath, jasonConfigFilePath, verbose=True) :
        # Set using self.binDef 
        self.binDef={} # dictionary of TUnfoldBinning object
        self.massBins=[] # list of tuple,  ex) [(40.,64.), (64., 81.), (81., 101.), (101., 200.), (200., 320.)]
        
        # Dictionary of raw histograms according DataFrames
        self.rawDataMeasured = {}
        self.rawDataBkgSubtracted = {}
        self.rawMCSignal = {}
        self.rawMCBackground = {}
        self.rawMCTotal = {}
        
        # Dictionary of DataFrames containing nominal and systematic histogram content
        self.Data = {}
        self.DataBkgSubtracted = {}
        self.MCSignal = {}
        self.MCBackground = {}
        self.MCTotal = {}
        
        self.bkgUsed = False
        
        
        # Read json file to get information of histograms
        with open(jasonConfigFilePath, 'r') as jsonFile :
            
            self.config = json.load(jsonFile)
               
            self.plotPrefix=self.config['plotPrefix'] # prefix for output plot file
            self.analysis=self.config['Analysis']
            self.year=self.config['Year']
            self.channel=self.config["Channel"]
            self.topDirName=self.config['topDirName']
            self.histPrefix=self.config["HistPrefix"]
            self.useTUnfoldBin=self.config["useTUnfoldBin"]
            self.tunfoldBinNames=self.config["TUnfoldBinNames"]
            self.variables=self.config["Variables"]
            self.variablePostfix=self.config["VariablePostfix"]
            self.steeringTUnfold=self.config['Steering']
            self.systematics=self.config['Systematics']
            self.samples=self.config['Samples']
            self.stackOrder=self.config['StackOrder']
        
        if "PDF" in self.systematics["theory"] :
            n_max=int(((self.systematics["theory"]["PDF"][1]).split("_"))[1])
            pdf_prefix=((self.systematics["theory"]["PDF"][1]).split("_"))[0]
            
            self.systematics["theory"]["PDF"].pop()
            self.systematics["theory"]["PDF"]=[pdf_prefix+"{:0>3}".format(str(i)) for i in range(1,n_max+1)]
            
        if verbose==True :
            print('This is {} {} data of {} analysis...'.format(self.year, self.channel, self.analysis))
            print("Systematics saved in the input root file...")
            print(self.systematics)
            print("Samples saved in the input root file...")
            print(self.samples)
            if self.useTUnfoldBin :
                print("TUnfoldBinning is used")
        
        # Open input histogram root file
        self.inRootFile=rt.TFile.Open(inputHistFilePath, 'READ')
        
        if self.useTUnfoldBin :
            for binName in self.tunfoldBinNames :
                varDirName = binName.split("_")[1]
                if len(self.variablePostfix) > 0 :
                    varDirName = varDirName + "_" + self.variablePostfix
                
                self.binDef[binName.split("_")[1]]=self.inRootFile.Get(self.topDirName+"/"+varDirName+"/"+binName)
                
            # Set massBins
            temp_tvecd=self.binDef["Pt"].GetDistributionBinning(1)
            temp_mass_bin_edges=temp_tvecd.GetMatrixArray()
            self.massBins.extend([ (temp_mass_bin_edges[index], temp_mass_bin_edges[index+1]) for index in range(temp_tvecd.GetNrows()-1)]) # list comprehension
            

        # Out directory
        self.outDirPath="output/"+self.year+"/"+self.channel+"/"
        if not os.path.exists(self.outDirPath) :
            os.makedirs(self.outDirPath)
        
        # Get raw TH1 histograms
        # Convert them into DataFrame
        for variable in self.variables :
            # dict[variable]
            self.rawDataMeasured[variable]=dict()
            self.rawMCSignal[variable]=dict()
            self.rawMCBackground[variable]=dict()
            self.rawMCTotal[variable]=dict()

            varDir=variable.split("_")[0] # In case, TUnfoldBinning used
            if len(self.variablePostfix)>0 :
                varDir=varDir+"_"+self.variablePostfix
            
            # dict[variable][sample]
            for combinedName in self.samples :
                if "Measured" in combinedName : temp_dict=self.rawDataMeasured[variable]
                if "Signal" in combinedName : temp_dict=self.rawMCSignal[variable]
                if "Background" in combinedName : 
                    temp_dict=self.rawMCBackground[variable]
                    self.bkgUsed = True
                
                temp_dict[combinedName]=dict() 
                first_sample=True
                for sampleName in self.samples[combinedName] :
                    temp_dict[sampleName]=dict()
                    
                    for sysCategory in self.systematics.keys() :  
                        for sysName, postfixs in self.systematics[sysCategory].items() :
                            temp_dict[sampleName][sysName]=dict()
                            if first_sample : temp_dict[combinedName][sysName]=dict()
                            for postfix in postfixs :
                                temp_dict[sampleName][sysName][postfix]=dict()
                                if first_sample : temp_dict[combinedName][sysName][postfix]=dict()
                                
                                # Get TH1 object!
                                if sysName == "Nominal" :
                                    temp_TH1=self.inRootFile.Get(self.topDirName+"/"+varDir+"/"+self.histPrefix+sampleName)
                                else :
                                    temp_TH1=self.inRootFile.Get(self.topDirName+"/"+varDir+"/"+self.histPrefix+sampleName+'_'+sysName+postfix)
                                    if type(temp_TH1) != rt.TH1D :
                                        temp_TH1=self.inRootFile.Get(self.topDirName+"/"+varDir+"/"+self.histPrefix+sampleName)
                                    
                                if self.useTUnfoldBin :
                                    temp_TH1=self.binDef[varDir.split("_")[0]].ExtractHistogram(sampleName+variable+sysName+postfix, temp_TH1, 0, True, self.steeringTUnfold[variable]) 
                                    
                                temp_dict[sampleName][sysName][postfix]["TH1"]=temp_TH1
                                temp_dict[sampleName][sysName][postfix]["DataFrame"]=self.convertTH1toDataFrame(temp_TH1)
                                
                                if first_sample :
                                    temp_dict[combinedName][sysName][postfix]["TH1"]=temp_TH1.Clone("Clone_"+combinedName+sampleName)
                                    temp_dict[combinedName][sysName][postfix]["DataFrame"]=self.convertTH1toDataFrame(temp_TH1)
                                else :
                                    temp_dict[combinedName][sysName][postfix]["TH1"].Add(temp_TH1.Clone("Clone_"+combinedName+sampleName))
                                    temp_dict[combinedName][sysName][postfix]["DataFrame"].loc[:,"content":]= \
                                    temp_dict[combinedName][sysName][postfix]["DataFrame"].loc[:,"content":]+self.convertTH1toDataFrame(temp_TH1).loc[:,"content":]
                                    
                    
                    first_sample=False
                                
            for sysCategory in self.systematics.keys() :
                for sysName, postfixs in self.systematics[sysCategory].items() :
                    self.rawMCTotal[variable][sysName]=dict()
                    for postfix in postfixs :
                        self.rawMCTotal[variable][sysName][postfix]=dict()
                        
        self.combineHists(self.rawDataMeasured)
        self.combineHists(self.rawMCSignal)
        if self.bkgUsed : self.combineHists(self.rawMCBackground)
        self.setMCTotalHists()
        if self.bkgUsed : self.setBkgSubtractedDataHis()
        
        self.setDataFrames("Data")
        if self.bkgUsed : self.setDataFrames("DataBkgSub")
        self.setDataFrames("Signal")
        if self.bkgUsed : self.setDataFrames("Background")
        self.setDataFrames("TotalMC")
        
        self.calculateCombinedUnc("Data", "total")
        self.calculateCombinedUnc("Data", "theory")
        self.calculateCombinedUnc("Data", "measurement")
        
        self.calculateCombinedUnc("Data", "total", "refinedUnc_meanValue")
        self.calculateCombinedUnc("Data", "theory", "refinedUnc_meanValue")
        self.calculateCombinedUnc("Data", "measurement", "refinedUnc_meanValue")
        
        if len(self.rawMCBackground)>0 :
            self.calculateCombinedUnc("DataBkgSub", "total")
            self.calculateCombinedUnc("DataBkgSub", "theory")
            self.calculateCombinedUnc("DataBkgSub", "measurement")
        
            self.calculateCombinedUnc("Background", "total")
            self.calculateCombinedUnc("Background", "theory") 
            self.calculateCombinedUnc("Background", "measurement")
        
            self.calculateCombinedUnc("DataBkgSub", "total", "refinedUnc_meanValue")
            self.calculateCombinedUnc("DataBkgSub", "theory", "refinedUnc_meanValue")
            self.calculateCombinedUnc("DataBkgSub", "measurement", "refinedUnc_meanValue")
            
            self.calculateCombinedUnc("Background", "total", "refinedUnc_meanValue")
            self.calculateCombinedUnc("Background", "theory", "refinedUnc_meanValue")
            self.calculateCombinedUnc("Background", "measurement", "refinedUnc_meanValue")
    
        self.calculateCombinedUnc("Signal", "total")
        self.calculateCombinedUnc("Signal", "theory")
        self.calculateCombinedUnc("Signal", "measurement")
        
        self.calculateCombinedUnc("Signal", "total", "refinedUnc_meanValue")
        self.calculateCombinedUnc("Signal", "theory", "refinedUnc_meanValue")
        self.calculateCombinedUnc("Signal", "measurement", "refinedUnc_meanValue")
        
        self.calculateCombinedUnc("TotalMC", "total")
        self.calculateCombinedUnc("TotalMC", "theory")
        self.calculateCombinedUnc("TotalMC", "measurement")
        
        self.calculateCombinedUnc("TotalMC", "total", "refinedUnc_meanValue")
        self.calculateCombinedUnc("TotalMC", "theory", "refinedUnc_meanValue")
        self.calculateCombinedUnc("TotalMC", "measurement", "refinedUnc_meanValue")

    def loglinear_func(self, p, x):
        return 2.*p[0]*np.log(x)+p[1]
    
    def setBkgSubtractedDataHis(self) :
         
        for variable in self.variables :
            self.rawDataBkgSubtracted[variable]=dict()
            self.rawDataBkgSubtracted[variable]["total"]=dict()
            
            for sysCategory in self.systematics.keys() :
                for sysName, postfixs in self.systematics[sysCategory].items() :
                    self.rawDataBkgSubtracted[variable]["total"][sysName]=dict()
                    for postfix in postfixs :
                        self.rawDataBkgSubtracted[variable]["total"][sysName][postfix]=dict()
                        temp_dict=self.rawDataBkgSubtracted[variable]["total"][sysName][postfix]
                        
                        data_dict=self.rawDataMeasured[variable]["total"][sysName][postfix]
                        mc_bkg_dict=self.rawMCBackground[variable]["total"][sysName][postfix]
                        
                        temp_dict["TH1"]=data_dict["TH1"].Clone("data_bkg_subtracted")
                        temp_dict["TH1"].Add(mc_bkg_dict["TH1"], -1)
                        
                        temp_dict["DataFrame"]=data_dict["DataFrame"].copy()
                        temp_dict["DataFrame"].content=data_dict["DataFrame"].content-mc_bkg_dict["DataFrame"].content
                        
    def setMCTotalHists(self) : 

        temp_dict = self.rawMCTotal
        for variable in self.variables :
            # dict[variable]
            temp_dict[variable]=dict()
            temp_dict[variable]["total"]=dict()
            
            for sysCategory in self.systematics.keys() :
                for sysName, postfixs in self.systematics[sysCategory].items() :
                    temp_dict[variable]["total"][sysName]=dict()
                    for postfix in postfixs :
                        temp_dict[variable]["total"][sysName][postfix]=dict()
                        
                        # Lets combine MC histograms
                        temp_dict[variable]["total"][sysName][postfix]["TH1"] = \
                        self.rawMCSignal[variable]["total"][sysName][postfix]["TH1"].Clone("Clone_"+variable+sysName+postfix)
                        temp_dict[variable]["total"][sysName][postfix]["DataFrame"] = \
                        self.rawMCSignal[variable]["total"][sysName][postfix]["DataFrame"].copy()
                        
                        if self.bkgUsed :
                            temp_dict[variable]["total"][sysName][postfix]["TH1"].Add(self.rawMCBackground[variable]["total"][sysName][postfix]["TH1"])
                            temp_dict[variable]["total"][sysName][postfix]["DataFrame"].content= \
                            self.rawMCBackground[variable]["total"][sysName][postfix]["DataFrame"].content+temp_dict[variable]["total"][sysName][postfix]["DataFrame"].content 
                            
    def setDataFrames(self, dictName="Data") :
        
        if dictName == "Data" :
            in_dict=self.rawDataMeasured
            out_dict=self.Data
        if dictName == "DataBkgSub" :
            in_dict=self.rawDataBkgSubtracted
            out_dict=self.DataBkgSubtracted
        if dictName == "Signal" :
            in_dict=self.rawMCSignal
            out_dict=self.MCSignal
        if dictName == "Background" :
            in_dict=self.rawMCBackground
            out_dict=self.MCBackground
        if dictName == "TotalMC" :
            in_dict=self.rawMCTotal
            out_dict=self.MCTotal
        
        for variable in in_dict.keys() :
            out_dict[variable]=dict()
            
            for sample in in_dict[variable].keys() : # Note loop over samples defined in the "dictionary!"
                out_dict[variable][sample]=dict()
                out_dict[variable][sample]["rawUnc"]=in_dict[variable][sample]["Nominal"]["Nominal"]["DataFrame"].copy()
                out_dict[variable][sample]["refinedUnc"]=in_dict[variable][sample]["Nominal"]["Nominal"]["DataFrame"].copy()
                # TODO Create column for statistical uncertainty
                
                # DataFrame for mean values
                # low mass cut, high mass cut, mean value
                # For Mass, make a DataFrame for all the mass bins
                # For pT, make a DataFrame for a mass bin
                out_dict[variable][sample]["rawUnc_meanValue"]=self.createMeanDataFrame(variable, in_dict[variable][sample]["Nominal"]["Nominal"]["TH1"])
                out_dict[variable][sample]["refinedUnc_meanValue"]=self.createMeanDataFrame(variable, in_dict[variable][sample]["Nominal"]["Nominal"]["TH1"])
                
                for sysCategory in self.systematics.keys() :  
                    for sysName, postfixs in self.systematics[sysCategory].items() :
                        if sysName == "Nominal" : continue
                        for postfix in postfixs :
                            if "fsr" not in sysName :
                                out_dict[variable][sample]["rawUnc"][sysName+"_"+postfix]= \
                                in_dict[variable][sample][sysName][postfix]["DataFrame"].content-in_dict[variable][sample]["Nominal"]["Nominal"]["DataFrame"].content
                        
                                temp_df=self.createMeanDataFrame(variable, in_dict[variable][sample][sysName][postfix]["TH1"])
                                out_dict[variable][sample]["rawUnc_meanValue"][sysName+"_"+postfix]= \
                                temp_df["mean"]-out_dict[variable][sample]["rawUnc_meanValue"]["mean"]
                            else :
                                current_index=postfixs.index(postfix)
                                the_other_postfix=None
                                if current_index == 0 : the_other_postfix=postfixs[1]
                                else : the_other_postfix=postfixs[0]
                                    
                                out_dict[variable][sample]["rawUnc"][sysName+"_"+postfix]= \
                                in_dict[variable][sample][sysName][postfix]["DataFrame"].content-in_dict[variable][sample][sysName][the_other_postfix]["DataFrame"].content
                        
                                temp_df1=self.createMeanDataFrame(variable, in_dict[variable][sample][sysName][postfix]["TH1"])
                                temp_df2=self.createMeanDataFrame(variable, in_dict[variable][sample][sysName][the_other_postfix]["TH1"])
                                out_dict[variable][sample]["rawUnc_meanValue"][sysName+"_"+postfix]= \
                                temp_df1["mean"]-temp_df2["mean"]
                                
                        
                        if sysName != "PDF" :
                            out_dict[variable][sample]["refinedUnc"][sysName+'_Up']  =out_dict[variable][sample]["rawUnc"].filter(like=sysName).max(axis=1)
                            out_dict[variable][sample]["refinedUnc"][sysName+'_Down']=out_dict[variable][sample]["rawUnc"].filter(like=sysName).min(axis=1)
                            
                            out_dict[variable][sample]["refinedUnc_meanValue"][sysName+'_Up']  =\
                            out_dict[variable][sample]["rawUnc_meanValue"].filter(like=sysName).max(axis=1)
                            out_dict[variable][sample]["refinedUnc_meanValue"][sysName+'_Down']=\
                            out_dict[variable][sample]["rawUnc_meanValue"].filter(like=sysName).min(axis=1)
                        else :
                            # TODO PDF uncertatinty
                            out_dict[variable][sample]["refinedUnc"][sysName+'_Up']  =np.sqrt(out_dict[variable][sample]["rawUnc"].filter(like=sysName).var(axis=1))/2.
                            out_dict[variable][sample]["refinedUnc"][sysName+'_Down']= -1. * np.sqrt(out_dict[variable][sample]["rawUnc"].filter(like=sysName).var(axis=1))/2.
                            
                            out_dict[variable][sample]["refinedUnc_meanValue"][sysName+'_Up']  =\
                            np.sqrt(out_dict[variable][sample]["rawUnc_meanValue"].filter(like=sysName).var(axis=1))/ 2.
                            out_dict[variable][sample]["refinedUnc_meanValue"][sysName+'_Down']=\
                            -1. * np.sqrt(out_dict[variable][sample]["rawUnc_meanValue"].filter(like=sysName).var(axis=1))/ 2.
                            
    def createMeanDataFrame(self, variable, TH1_hist) :
        
        if "Pt" in variable :
            nth_mass_bin=int(variable.split("_")[1]) # 
            temp_mean=TH1_hist.GetMean()
            temp_dict={"low mass cut": self.massBins[nth_mass_bin][0], "high mass cut": self.massBins[nth_mass_bin][1], "mean": temp_mean}
            temp_df=pd.DataFrame([temp_dict], columns=['low mass cut','high mass cut','mean'])
            
            return temp_df 
        
        if "Mass" in variable :
            # Get mean mass for all mass bins
            #self.massBins
            # Create DataFrame
            row_list=[]
            for massBin in self.massBins :
                TH1_hist.GetXaxis().SetRangeUser(massBin[0], massBin[1])
                temp_mean=TH1_hist.GetMean()
                #print("low mass cut: {} high mass cut: {} mean : {}".format(massBin[0], massBin[1], temp_mean))
                temp_dict={"low mass cut": massBin[0], "high mass cut": massBin[1], "mean": temp_mean}
                row_list.append(temp_dict)
            temp_df = pd.DataFrame(row_list, columns=['low mass cut','high mass cut','mean'])
            
            return temp_df
        
    def calculateCombinedUnc(self, dictName="Data", sys_to_combine="total", column_name="refinedUnc") :
        
        if dictName == "Data" :
            in_dict=self.Data
        if dictName == "DataBkgSub" :
            in_dict=self.DataBkgSubtracted
        if dictName == "Signal" :
            in_dict=self.MCSignal
        if dictName == "Background" :
            in_dict=self.MCBackground
        if dictName == "TotalMC" :
            in_dict=self.MCTotal
        
        # total uncertainty (stat+sys), total theory, total measurement
        for variable in in_dict.keys() :
            for sample in in_dict[variable].keys() :
                
                combinedSys_Up = None
                combinedSys_Down = None
                first_sys=True
                for sysCategory in self.systematics.keys() :
                    if sys_to_combine != "total" :
                        if sysCategory != sys_to_combine : continue
                        
                    for sysName, postfixs in self.systematics[sysCategory].items() :
                        if sysName == "Nominal" : continue # Nominal don't have Up, Down...
                        if first_sys :
                            combinedSys_Up=np.square(in_dict[variable][sample][column_name][sysName+'_Up']).copy()
                            combinedSys_Down=np.square(in_dict[variable][sample][column_name][sysName+'_Down']).copy()
                            first_sys=False
                        else :
                            combinedSys_Up=combinedSys_Up+np.square(in_dict[variable][sample][column_name][sysName+'_Up'])
                            combinedSys_Down=combinedSys_Down+np.square(in_dict[variable][sample][column_name][sysName+'_Down'])
                                
                in_dict[variable][sample][column_name][sys_to_combine+"_Up"]=np.sqrt(combinedSys_Up)
                in_dict[variable][sample][column_name][sys_to_combine+"_Down"]=np.sqrt(combinedSys_Down)
            
    def combineHists(self, mc_dict) :
         
        for variable in mc_dict.keys() :
            mc_dict[variable]["total"]=dict()
            
            for sysCategory in self.systematics.keys() :
                for sysName, postfixs in self.systematics[sysCategory].items() :
                    mc_dict[variable]["total"][sysName]=dict()
                    for postfix in postfixs :
                        mc_dict[variable]["total"][sysName][postfix]=dict()
                        
                        # Lets combine MC histograms
                        first_mc=True
                        for sample in mc_dict[variable].keys() :
                            if sample in self.samples.keys(): continue
                            if sample == "total" : continue
                            if first_mc :
                                first_mc=False
                                mc_dict[variable]["total"][sysName][postfix]["TH1"]=\
                                mc_dict[variable][sample][sysName][postfix]["TH1"].Clone("Clone_"+variable+sample+sysName+postfix)
                                
                                mc_dict[variable]["total"][sysName][postfix]["DataFrame"]=\
                                mc_dict[variable][sample][sysName][postfix]["DataFrame"].copy()
                            else :
                                mc_dict[variable]["total"][sysName][postfix]["TH1"].Add(mc_dict[variable][sample][sysName][postfix]["TH1"])
                                
                                mc_dict[variable]["total"][sysName][postfix]["DataFrame"].content= \
                                mc_dict[variable]["total"][sysName][postfix]["DataFrame"].content+mc_dict[variable][sample][sysName][postfix]["DataFrame"].content  
                                
                                
    def convertTH1toDataFrame(self, temp_TH1) :

        temp_content, temp_binEdge=hist2array(temp_TH1, return_edges=True)
        binWidth = (temp_binEdge[0][1:] - temp_binEdge[0][:-1])
        nBin=len(temp_binEdge[0])-1
        
        pd_series_binIndex=pd.Series(range(1,nBin+1), range(1, nBin+1), name="bin_index")
        pd_series_binWidth=pd.Series(binWidth, range(1, nBin+1), name="bin_width")
        pd_series_lowBinEdge=pd.Series(temp_binEdge[0][0:-1], range(1, nBin+1), name="low_bin_edge")
        pd_series_highBinEdge=pd.Series(temp_binEdge[0][1:], range(1, nBin+1), name="high_bin_edge")
        
        dict_temp={
            'bin_width': pd_series_binWidth, 
            'low_bin_edge': pd_series_lowBinEdge,
            'high_bin_edge': pd_series_highBinEdge,
            'content': temp_content,
        }
        pd_temp=pd.DataFrame(dict_temp, index=pd_series_binIndex)
        
        return pd_temp
    
    def drawSubPlot(self, top_axis, bottom_axis, variable, divde_by_bin_width = False, setLogy=False, 
                    write_xaxis_title=False, write_yaxis_title=False, showMeanValue=False) :
        # Get DataFrame
        dataName=""
        data_df={}
        totalMC=self.MCTotal[variable]["total"]["refinedUnc"].copy()
        
        channelName = "e^{+}e^{-}"
        if self.channel == "muon" :
            channelName = "\mu^{+}\mu^{-}"
        
        for combinedName in self.samples :
            if "Data" in combinedName :
                dataName=combinedName
                data_df[combinedName]=self.Data[variable][combinedName]["refinedUnc"].copy()
            if "Signal" in combinedName :
                data_df[combinedName]=self.MCSignal[variable][combinedName]["refinedUnc"].copy()
            if "Background" in combinedName :
                data_df[combinedName]=self.MCBackground[variable][combinedName]["refinedUnc"].copy()

        # Get basic histogram configuration
        x_bin_centers=data_df[dataName]['low_bin_edge']+data_df[dataName]['bin_width']/2.
        bin_width=data_df[dataName]['bin_width']
        x_min=data_df[dataName]['low_bin_edge'].iloc[0]
        x_max=data_df[dataName]['high_bin_edge'].iloc[-1]
        nbins=len(x_bin_centers)
        
        binWidthnumpy = np.asarray([data_df[dataName]['bin_width']/2.])
        binWidthxerr = np.append(binWidthnumpy, binWidthnumpy, axis=0)
        
        if write_yaxis_title : top_axis.text(0., 1.07, "CMS Work in progress", fontsize=20, transform=top_axis.transAxes)
        if write_xaxis_title : top_axis.text(1., 1.07, "(13 TeV, " + self.year + ")", fontsize=20, transform=top_axis.transAxes, ha='right')
        if "Pt" in variable :
            nth_bin = int(variable.split("_")[1])
            top_axis.text(0., 1.01, str(self.massBins[nth_bin][0])+"$ < M^{\mathit{"+channelName+"}} < $"+str(self.massBins[nth_bin][1]), 
                          fontsize=10, transform=top_axis.transAxes)
            
        if divde_by_bin_width: 
            if write_yaxis_title: top_axis.set_ylabel('Events/\n 1 GeV', fontsize=20, ha='right', y=1.0)
            totalMC["content"]=totalMC["content"]/data_df[combinedName]['bin_width']
            totalMC["total_Up"]=totalMC["total_Up"]/data_df[combinedName]['bin_width']
            totalMC["total_Down"]=totalMC["total_Down"]/data_df[combinedName]['bin_width']
            
            for combinedName in self.samples :
                data_df[combinedName]["content"]=data_df[combinedName]["content"]/ data_df[combinedName]['bin_width']
        else :
            if write_yaxis_title: top_axis.set_ylabel('Events/\n Bin', fontsize=20, ha='right', y=1.0)
            
        color=iter(cm.rainbow(np.linspace(0,1,len(self.stackOrder))))
        
        top_axis.scatter(x_bin_centers, data_df[dataName]["content"], marker='o', c='black', s=50, zorder=4, label='Data')
        top_axis.set_xlim(x_min, x_max)
        data_abs_systematic=self.makeErrorNumpy(data_df[dataName].total_Up, data_df[dataName].total_Down)
        mc_abs_systematic=self.makeErrorNumpy(totalMC.total_Up, totalMC.total_Down)
        self.make_error_boxes(top_axis, x_bin_centers.values, data_df[dataName]["content"], binWidthxerr, data_abs_systematic, showBar=False, alpha=1.0, edgecolor='None', facecolor='white', zorder=2, hatch_style="////")
        self.make_error_boxes(top_axis, x_bin_centers.values, totalMC["content"], binWidthxerr, mc_abs_systematic, showBar=False, alpha=0.2, edgecolor='None', facecolor='red')
        
        for i, stack in enumerate(self.stackOrder) :
            if i==0 :
                if(len(self.stackOrder)==1) :
                    top_axis.errorbar(x_bin_centers, data_df[stack]['content'], xerr=bin_width/2., yerr=0, fmt='ro', ecolor='red')
                else :
                    top_axis.bar(x_bin_centers, data_df[stack]['content'], width = bin_width, color=next(color), label=stack)
                stacks=data_df[stack]['content']
            else :
                top_axis.bar(x_bin_centers, data_df[stack]['content'], width = bin_width, color=next(color), bottom=stacks, label='Drell-Yan')
                stacks=stacks+data_df[stack]['content']
                
        if setLogy :
            top_axis.set_yscale("log")
            top_axis.set_ylim(5e-1, 1e2 * data_df[dataName]["content"].max())
            top_axis.yaxis.set_major_locator(LogLocator(10, numticks=14))
            top_axis.yaxis.set_minor_locator(LogLocator(10, subs=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9), numticks=14))
        else :
            top_axis.set_ylim(0., 1.5 * data_df[dataName]["content"].max())
            
        if "Mass" in variable :
            for bin_ in self.massBins[:-1] :
                top_axis.axvline(bin_[1], color='black', linewidth=0.5, zorder=3)
                
        if showMeanValue :
            temp_data_mean_df=self.Data[variable]["total"]["refinedUnc_meanValue"]
            if "Mass" in variable :
                for i in temp_data_mean_df.index :
                    #print("Mean: {}".format(data_df["Mass"]["total"]["refinedUnc_meanValue"]["mean"][i]))
                    top_axis.axvline(temp_data_mean_df["mean"][i], color='black', linewidth=1, linestyle=":")
            if "Pt" in variable :
                top_axis.axvline(temp_data_mean_df["mean"][0], color='black', linewidth=1, linestyle=":")
    
        bottom_axis.set_ylim(0.65,1.35)
            
        varName = "$p_{T}^{\mathit{"+channelName+"}}$"
        if variable == "Mass" :
            varName = "Mass$^{\mathit{"+channelName+"}}$"
            
        if write_xaxis_title : bottom_axis.set_xlabel(varName + " [GeV]", fontsize=20, ha='right', x=1.0)
        if write_yaxis_title : bottom_axis.set_ylabel("MC/\n Data", fontsize=20)
    
        ratio=totalMC.content/ data_df[dataName].content
        one_points=data_df[dataName].content/data_df[dataName].content
        #bottom_axis.scatter(x_bin_centers, ratio, facecolors='red', marker="_", edgecolor='red', s=40, zorder=2)
        bottom_axis.errorbar(x_bin_centers, ratio, xerr=bin_width/2., yerr=0, fmt='r,', ecolor='red', zorder=5)
        bottom_axis.scatter(x_bin_centers, one_points, facecolors='black', edgecolor='black', s=40, zorder=3)
        bottom_axis.axhline(1., color='black', linewidth=1, zorder=1)
        if "Mass" in variable :
            for bin_ in self.massBins[:-1] :
                bottom_axis.axvline(bin_[1], color='black', linewidth=0.5, zorder=6)
        
        systematic=self.makeErrorNumpy(totalMC.total_Up/ data_df[dataName].content, totalMC.total_Down/ data_df[dataName].content)
        data_systematic=self.makeErrorNumpy(data_df[dataName].total_Up/data_df[dataName].content, data_df[dataName].total_Down/data_df[dataName].content)
        self.make_error_boxes(bottom_axis, x_bin_centers.values, ratio.values, binWidthxerr, systematic, showBar=False, alpha=0.2, edgecolor='None', facecolor='red', zorder=4)
        self.make_error_boxes(bottom_axis, x_bin_centers.values, one_points, binWidthxerr, data_systematic, showBar=False, alpha=1.0, edgecolor='None', facecolor='white', zorder=2, hatch_style="/////")
    
    def drawHistPlot(self, *variables, divde_by_bin_width = False, setLogy=False, showMeanValue=False) :
        
        variable=variables[0]
        
        fig, axes = plt.subplots(2,len(variables), sharex=True, figsize=(10, 6), gridspec_kw={'hspace': 0, 'height_ratios':[1, 0.3]})
        fig.tight_layout()
        plt.subplots_adjust(left=0.12, right=0.97, bottom=0.15, top=0.9)
        
        write_xaxis_title = True
        write_yaxis_title = True
        for index, variable in enumerate(variables) :
            if len(variables) == 1:
                self.drawSubPlot(axes[0], axes[1], variable, divde_by_bin_width, setLogy, write_xaxis_title, write_yaxis_title,showMeanValue)
            else :
                if index > 0 : 
                    write_yaxis_title=False
                if index < len(variables)-1 : 
                    write_xaxis_title=False
                else :
                    write_xaxis_title=True
                    
                self.drawSubPlot(axes[0][index], axes[1][index], variable, divde_by_bin_width, setLogy, write_xaxis_title, write_yaxis_title, showMeanValue)
        
        plt.savefig(self.outDirPath+self.plotPrefix+self.topDirName+"_"+variable+".pdf", format="pdf", dpi=300)
        plt.close(fig)
        
    def combinedPtDataFrame(self, name="Data") :
        
        if name == "Data" :
            in_df=self.Data
        if name == "DataBkgSub" :
            in_df=self.DataBkgSubtracted
        if name == "Signal" :
            in_df=self.MCSignal
        if name == "Background" :
            in_df=self.MCBackground
        if name == "TotalMC" :
            in_df=self.MCTotal
        
        combined_pt_df = None
        for nth_mass_bin in range(len(self.massBins)) :
            if nth_mass_bin == 0 :
                combined_pt_df=in_df["Pt_"+str(nth_mass_bin)]["total"]["refinedUnc_meanValue"].copy()
            else :
                combined_pt_df=combined_pt_df.append(in_df["Pt_"+str(nth_mass_bin)]["total"]["refinedUnc_meanValue"].copy(), ignore_index=True)
        return combined_pt_df
    
    def doLogLinearFit(self, ax, mass_df, pt_df, mass_unc, pt_unc, line_color) :
        
        print("do log-linear fit!")
        x_err=[]
        y_err=[]
        for i in range(len(mass_unc.T)) :
            x_err.append(max(mass_unc.T[i]))
            y_err.append(max(pt_unc.T[i]))
            
        loglinear=Model(self.loglinear_func)
        data=RealData(mass_df["mean"], pt_df["mean"], sx=x_err, sy=y_err)
        odr=ODR(data, loglinear, beta0=[1.0, 0.0])
        out=odr.run()
        out.pprint()
        
        xn = np.linspace(min(mass_df["mean"]), max(mass_df["mean"]), 1000)
        yn = self.loglinear_func(out.beta, xn)
        ax.plot(xn, yn, color=line_color, linewidth=0.8)
        # prepare parameters for confidence interval curves
        nstd = 1. # to draw 1-sigma intervals
        popt_up = out.beta + nstd * out.sd_beta
        popt_dw = out.beta - nstd * out.sd_beta
        
        # calculate y values for 1 sigma
        fit_up = self.loglinear_func(popt_up, xn)
        fit_dw = self.loglinear_func(popt_dw, xn)
        
        # plot 1 sigma
        #ax.plot(xn, fit_up, '#0076D4', dashes=[9, 4.5], label='1 Sigma uncertainty', linewidth=0.8)
        #ax.plot(xn, fit_dw, '#0076D4', dashes=[9, 4.5], linewidth=0.8)
        
    def drawISRPlot(self, *list_to_plot, do_linear_fit=False) :
        print("draw isr plot, do_linear_fit {}".format(do_linear_fit))
        color=iter(cm.rainbow(np.linspace(0,1,len(list_to_plot))))
        isData=False
        
        fig, ax = plt.subplots(figsize=(10, 6))
        plt.subplots_adjust(left=0.12, right=0.97, bottom=0.15, top=0.9)
        ax.text(0., 1.05, "CMS Work in progress", fontsize=20, transform=ax.transAxes)
        ax.text(1., 1.05, "(13 TeV, " + self.year + ")", fontsize=20, transform=ax.transAxes, ha='right')

        ax.set_xlim(40,4e2) 
        ax.set_ylim(13,29) 
        ax.set_xscale("log")
        ax.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
        
        ax.set_xlabel("Mean $M^{ll}$ [GeV]", fontsize=20, ha='right', x=1.0)
        ax.set_ylabel("Mean $p_{T}^{ll}$ [GeV]", fontsize=20, ha='right', y=1.0)
        
        for index, name in enumerate(list_to_plot) :  
            if name == "Data" :
                isData=True
                temp_mass_df=self.Data["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=self.combinedPtDataFrame("Data")
            if name == "DataBkgSub" :
                temp_mass_df=self.DataBkgSubtracted["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=self.combinedPtDataFrame("DataBkgSub")
            if name == "Signal" :
                temp_mass_df=self.MCSignal["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=self.combinedPtDataFrame("Signal")
            if name == "Background" :
                temp_mass_df=self.MCBackground["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=self.combinedPtDataFrame("Background")
            if name == "TotalMC" :
                temp_mass_df=self.MCTotal["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=self.combinedPtDataFrame("TotalMC")
        
            mass_systematic=self.makeErrorNumpy(temp_mass_df.total_Up, temp_mass_df.total_Down)
            pt_systematic=self.makeErrorNumpy(temp_pt_df.total_Up, temp_pt_df.total_Down)
            
            color_=next(color)
            
            ax.errorbar(temp_mass_df["mean"], temp_pt_df["mean"], xerr=mass_systematic, yerr=pt_systematic, fmt='o', color=color_, label=name)
            if do_linear_fit :
                if isData :
                    self.doLogLinearFit(ax, temp_mass_df, temp_pt_df, mass_systematic, pt_systematic, color_)
            isData=False
            
        ax.legend(loc='best', fontsize=15, fancybox=False, framealpha=0.0)
        
        plt.savefig(self.outDirPath+self.plotPrefix+"ISR.pdf", format="pdf", dpi=300)
        plt.close(fig)

    def drawISRPlots(self, *objects_to_plot, names_in_objects, do_linear_fit=None, labels=None, marker=None) :

        print("draw isr plot, do_linear_fit {}".format(do_linear_fit))
        color=iter(cm.rainbow(np.linspace(0,1,len(names_in_objects))))
        isData=False
        
        fig, ax = plt.subplots(figsize=(10, 6))
        plt.subplots_adjust(left=0.12, right=0.97, bottom=0.15, top=0.9)
        ax.text(0., 1.05, "CMS Work in progress", fontsize=20, transform=ax.transAxes)
        ax.text(1., 1.05, "(13 TeV, " + self.year + ")", fontsize=20, transform=ax.transAxes, ha='right')

        ax.set_xlim(30,4e2) 
        ax.set_ylim(13,30) 
        ax.set_xscale("log")
        ax.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
        
        ax.set_xlabel("Mean $M^{ll}$ [GeV]", fontsize=20, ha='right', x=1.0)
        ax.set_ylabel("Mean $p_{T}^{ll}$ [GeV]", fontsize=20, ha='right', y=1.0)
        
        for index, name in enumerate(names_in_objects) :  
            
            isData=False
            if name == "Data" :
                isData=True
                
            if index==0 :
                temp_mass_df=self.getDict(name)["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=self.combinedPtDataFrame(name)
            else :
                temp_mass_df=objects_to_plot[index-1].getDict(name)["Mass"]["total"]["refinedUnc_meanValue"]
                temp_pt_df=objects_to_plot[index-1].combinedPtDataFrame(name)
            
            mass_systematic=self.makeErrorNumpy(temp_mass_df.total_Up, temp_mass_df.total_Down)
            pt_systematic=self.makeErrorNumpy(temp_pt_df.total_Up, temp_pt_df.total_Down)
            
            color_=next(color)
            
            label_name=name
            if labels is not None :
                label_name=labels[index]
            if marker is None :
                marker='o'
                
            ax.errorbar(temp_mass_df["mean"], temp_pt_df["mean"], xerr=mass_systematic, yerr=pt_systematic, fmt=marker, color=color_, label=label_name)
            
            if do_linear_fit[index-1] :
                if isData :
                    self.doLogLinearFit(ax, temp_mass_df, temp_pt_df, mass_systematic, pt_systematic,color_)
            #self.doLogLinearFit(ax, temp_mass_df, temp_pt_df, mass_systematic, pt_systematic)
            isData=False
            
        ax.legend(loc='best', fontsize=15, fancybox=False, framealpha=0.0)
        
        plt.savefig(self.outDirPath+self.plotPrefix+"_ISR_comparison.pdf", format="pdf", dpi=300)
        plt.close(fig)
    
    def make_error_boxes(self, ax, xdata, ydata, xerror, yerror, 
                         showBox=True, showBar=False, facecolor='red', edgecolor='None', alpha=0.5, zorder=5, hatch_style=None):
    
        # Loop over data points; create box from errors at each point
        errorboxes = [Rectangle((x - xe[0], y - ye[0]), xe.sum(), ye.sum())
                      for x, y, xe, ye in zip(xdata, ydata, xerror.T, yerror.T)]
    
        # Create patch collection with specified colour/alpha
        if showBox :
            pc = PatchCollection(errorboxes, facecolor=facecolor, alpha=alpha,
                                 edgecolor=edgecolor, zorder = zorder, linewidth=0.5, hatch=hatch_style)
            # Add collection to axes
            ax.add_collection(pc)

        if showBar :
            # Plot errorbars
            artists = ax.errorbar(xdata, ydata, xerr=xerror, yerr=yerror,
                                  fmt='None', ecolor='k')
    
        #return artists
    
    def makeErrorNumpy(self, Up, Down) :

        Up=Up.values
        Up=Up.reshape(1,len(Up))
        Down=Down.values
        Down=Down.reshape(1,len(Down))
        
        UpDown=Up
        UpDown=np.append(UpDown, Down, axis = 0)
        
        return UpDown
        
    def getRawDict(self, dictName) :
        if dictName == "Data" :
            return self.rawDataMeasured
        if dictName == "DataBkgSub" :
            return self.rawDataBkgSubtracted
        if dictName == "Signal" :
            return self.rawMCSignal
        if dictName == "Background" :
            return self.rawMCBackground
        if dictName == "TotalMC" :
            return self.rawMCTotal
        
    def getDict(self, dictName) :
        if dictName == "Data" :
            return self.Data
        if dictName == "DataBkgSub" :
            return self.DataBkgSubtracted
        if dictName == "Signal" :
            return self.MCSignal
        if dictName == "Background" :
            return self.MCBackground
        if dictName == "TotalMC" :
            return self.MCTotal

In [4]:
#isr_analysis["year"]["channel"]["level"]
years=["2016","2017","2018"]
#years=["2016"]
channels=["muon","electron"]
levels=["DetUNFOLD", "DetUNFOLD_Acceptance", "FSRUNFOLD", "FSRUNFOLD_Acceptance"]

isr_analysis={}
for year in years :
    isr_analysis[year]=dict()
    for channel in channels :
        isr_analysis[year][channel]=dict()
        isr_analysis[year][channel]["detector"]=ISRPlotter("./inFiles/"+year+"/"+channel+"/unfold_input.root",'./config/'+year+'/'+channel+'/config_'+year+'_'+channel+'.json', False)
        isr_analysis[year][channel]["detector"].drawHistPlot("Mass", divde_by_bin_width = False, setLogy=True, showMeanValue=False)
        isr_analysis[year][channel]["detector"].drawHistPlot("Pt_0","Pt_1","Pt_2","Pt_3","Pt_4", divde_by_bin_width = False, setLogy=True, showMeanValue=False)
        isr_analysis[year][channel]["detector"].drawISRPlot("DataBkgSub","Signal")
        
        doFit=False
        for level in levels :
            isr_analysis[year][channel][level]=ISRPlotter("./output/"+year+"/"+channel+"/"+level+"_"+channel+"_"+year+".root","./config/"+year+"/"+channel+"/config_"+level+"_"+channel+"_"+year+".json", False)
            isr_analysis[year][channel][level].drawHistPlot("Mass", divde_by_bin_width = False, setLogy=True, showMeanValue=False)
            isr_analysis[year][channel][level].drawHistPlot("Pt_0","Pt_1","Pt_2","Pt_3","Pt_4", divde_by_bin_width = False, setLogy=True, showMeanValue=False)
           
            if level == "FSRUNFOLD_Acceptance" :
                doFit=True
            else : 
                doFit=False
                
            isr_analysis[year][channel][level].drawISRPlot("Data","Signal",do_linear_fit=doFit)


draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit True
do log-linear fit!
Beta: [  3.90686848 -17.05118485]
Beta Std Error: [0.10818581 0.96166514]
Beta Covariance: [[ 0.00961845 -0.08538872]
 [-0.08538872  0.7599975 ]]
Residual Variance: 1.2168459050285587
Inverse Condition #: 0.0005639525840743485
Reason(s) for Halting:
  Sum of squares convergence
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit False
draw isr plot, do_linear_fit True
do log-linear fit!
Beta: [  3.82077362 -16.22672819]
Beta Std Error: [0.05904497 0.53329512]
Beta Covariance: [[ 0.01945216 -0.17556584]
 [-0.17556584  1.58685518]]
Residual Variance: 0.1792247226567992
Inverse Condition #: 0.00041547390615081066
Reason(s) for Halting:
  Sum of squares convergence
draw isr plot, do_linear_fit False
draw isr plot, do_li

In [5]:
isr_analysis["2016"]["muon"]["FSRUNFOLD_Acceptance"].drawISRPlots(isr_analysis["2017"]["muon"]["FSRUNFOLD_Acceptance"], isr_analysis["2017"]["muon"]["FSRUNFOLD_Acceptance"],
                                                                  isr_analysis["2016"]["electron"]["FSRUNFOLD_Acceptance"], isr_analysis["2017"]["electron"]["FSRUNFOLD_Acceptance"],
                                                                  isr_analysis["2018"]["electron"]["FSRUNFOLD_Acceptance"],
                                                      names_in_objects=["Data","Data","Data","Data","Data","Data"], do_linear_fit=[False, False, False, False, False, False])

draw isr plot, do_linear_fit [False, False, False, False, False, False]


In [6]:
isr_analysis["2016"]["electron"]["detector"].drawISRPlots(isr_analysis["2017"]["electron"]["detector"], isr_analysis["2018"]["electron"]["detector"],
                                                          isr_analysis["2016"]["electron"]["detector"], isr_analysis["2017"]["electron"]["detector"],
                                                          isr_analysis["2018"]["electron"]["detector"],
                                                          names_in_objects=["DataBkgSub","DataBkgSub","DataBkgSub","Signal","Signal","Signal"], do_linear_fit=[False, False, False, False, False, False], 
                                                          labels=["2016 electron", "2017 electron", "2018 electron", "2016 DY", "2017 DY", "2018 DY"])

draw isr plot, do_linear_fit [False, False, False, False, False, False]


In [7]:
isr_analysis["2016"]["muon"]["detector"].drawISRPlots(isr_analysis["2017"]["muon"]["detector"], isr_analysis["2018"]["muon"]["detector"],
                                                      names_in_objects=["DataBkgSub","DataBkgSub","DataBkgSub"], do_linear_fit=[False, False, False], labels=["2016 muon", "2017 muon", "2018 muon"])

draw isr plot, do_linear_fit [False, False, False]


In [8]:
isr_analysis["2016"]["electron"]["FSRUNFOLD_Acceptance"].drawISRPlots(isr_analysis["2017"]["electron"]["FSRUNFOLD_Acceptance"], isr_analysis["2018"]["electron"]["FSRUNFOLD_Acceptance"],
                                                      names_in_objects=["Data","Data","Data"], do_linear_fit=[False, False, False], labels=["2016 electron", "2017 electron", "2018 electron"])

draw isr plot, do_linear_fit [False, False, False]


In [9]:
isr_analysis["2016"]["muon"]["FSRUNFOLD_Acceptance"].drawISRPlots(isr_analysis["2017"]["muon"]["FSRUNFOLD_Acceptance"], isr_analysis["2018"]["muon"]["FSRUNFOLD_Acceptance"],
                                                      names_in_objects=["Data","Data","Data"], do_linear_fit=[False, False, False], labels=["2016 muon", "2017 muon", "2018 muon"])


draw isr plot, do_linear_fit [False, False, False]


In [10]:
isr_analysis["2016"]["electron"]["FSRUNFOLD_Acceptance"].drawISRPlots(isr_analysis["2016"]["muon"]["FSRUNFOLD_Acceptance"],
                                                      names_in_objects=["Data","Data"], do_linear_fit=[True, True], labels=["2016 electron", "2016 muon"])

draw isr plot, do_linear_fit [True, True]
do log-linear fit!
Beta: [  3.82077362 -16.22672819]
Beta Std Error: [0.05904497 0.53329512]
Beta Covariance: [[ 0.01945216 -0.17556584]
 [-0.17556584  1.58685518]]
Residual Variance: 0.1792247226567992
Inverse Condition #: 0.00041547390615081066
Reason(s) for Halting:
  Sum of squares convergence
do log-linear fit!
Beta: [  3.90686848 -17.05118485]
Beta Std Error: [0.10818581 0.96166514]
Beta Covariance: [[ 0.00961845 -0.08538872]
 [-0.08538872  0.7599975 ]]
Residual Variance: 1.2168459050285587
Inverse Condition #: 0.0005639525840743485
Reason(s) for Halting:
  Sum of squares convergence


In [11]:
isr_analysis["2017"]["electron"]["FSRUNFOLD_Acceptance"].drawISRPlots(isr_analysis["2017"]["muon"]["FSRUNFOLD_Acceptance"],
                                                      names_in_objects=["Data","Data"], do_linear_fit=[True, True], labels=["2017 electron", "2017 muon"])

draw isr plot, do_linear_fit [True, True]
do log-linear fit!
Beta: [  3.54650278 -13.54925808]
Beta Std Error: [0.25279301 2.27868077]
Beta Covariance: [[ 0.02088745 -0.188216  ]
 [-0.188216    1.69715774]]
Residual Variance: 3.0594598999889904
Inverse Condition #: 0.0002857500112914843
Reason(s) for Halting:
  Sum of squares convergence
do log-linear fit!
Beta: [  3.81431888 -16.13806824]
Beta Std Error: [0.08717936 0.77534958]
Beta Covariance: [[ 0.00635815 -0.05648892]
 [-0.05648892  0.50291949]]
Residual Variance: 1.1953542920718054
Inverse Condition #: 0.0005069007373317109
Reason(s) for Halting:
  Sum of squares convergence


In [12]:
isr_analysis["2018"]["electron"]["FSRUNFOLD_Acceptance"].drawISRPlots(isr_analysis["2018"]["muon"]["FSRUNFOLD_Acceptance"],
                                                      names_in_objects=["Data","Data"], do_linear_fit=[True, True], labels=["2018 electron", "2018 muon"])

draw isr plot, do_linear_fit [True, True]
do log-linear fit!
Beta: [  3.99216973 -17.57871002]
Beta Std Error: [0.18045572 1.63235404]
Beta Covariance: [[ 0.01927061 -0.17419117]
 [-0.17419117  1.57682261]]
Residual Variance: 1.689841143602175
Inverse Condition #: 0.00041514351659667774
Reason(s) for Halting:
  Sum of squares convergence
do log-linear fit!
Beta: [  3.80876805 -16.08250452]
Beta Std Error: [0.11587096 1.03272813]
Beta Covariance: [[ 0.00563842 -0.05020364]
 [-0.05020364  0.44789879]]
Residual Variance: 2.381179465507276
Inverse Condition #: 0.0004956050111644462
Reason(s) for Halting:
  Sum of squares convergence


In [13]:
isr_analysis["2016"]["electron"]["detector"].drawISRPlots(isr_analysis["2016"]["electron"]["DetUNFOLD"], isr_analysis["2016"]["electron"]["DetUNFOLD_Acceptance"],
                                                          isr_analysis["2016"]["electron"]["FSRUNFOLD"], isr_analysis["2016"]["electron"]["FSRUNFOLD_Acceptance"],
                                                          names_in_objects=["DataBkgSub","Data","Data","Data","Data"], do_linear_fit=[False, False, False, False, False], 
                                                          labels=["Detector", "Detector unfoled", "+ Efficiency", "FSR unfolded", "+ Acceptance"],
                                                          marker='o-')

draw isr plot, do_linear_fit [False, False, False, False, False]


In [14]:
isr_analysis["2016"]["muon"]["detector"].drawISRPlots(isr_analysis["2016"]["muon"]["DetUNFOLD"], isr_analysis["2016"]["muon"]["DetUNFOLD_Acceptance"],
                                                          isr_analysis["2016"]["muon"]["FSRUNFOLD"], isr_analysis["2016"]["muon"]["FSRUNFOLD_Acceptance"],
                                                          names_in_objects=["DataBkgSub","Data","Data","Data","Data"], do_linear_fit=[False, False, False, False, False], 
                                                          labels=["Detector", "Detector unfoled", "+ Efficiency", "FSR unfolded", "+ Acceptance"],
                                                          marker='o-')

draw isr plot, do_linear_fit [False, False, False, False, False]


In [15]:
data_dict=isr_analysis["2018"]["muon"]["FSRUNFOLD_Acceptance"].getDict("Data")
mc_dict=isr_analysis["2018"]["muon"]["FSRUNFOLD_Acceptance"].getDict("Signal")

data_dict_2016=isr_analysis["2016"]["muon"]["FSRUNFOLD_Acceptance"].getDict("Data")
mc_dict_2016=isr_analysis["2016"]["muon"]["FSRUNFOLD_Acceptance"].getDict("Signal")

data_dict_2017=isr_analysis["2017"]["muon"]["FSRUNFOLD_Acceptance"].getDict("Data")
mc_dict_2017=isr_analysis["2017"]["muon"]["FSRUNFOLD_Acceptance"].getDict("Signal")

In [16]:
data_dict["Pt_4"]["Data_Measured"]['refinedUnc']

Unnamed: 0_level_0,bin_width,low_bin_edge,high_bin_edge,content,IdSF_Up,IdSF_Down,trgSF_Up,trgSF_Down,PU_Up,PU_Down,...,Norm_Up,Norm_Down,Scale_Up,Scale_Down,total_Up,total_Down,theory_Up,theory_Down,measurement_Up,measurement_Down
bin_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,14229.864227,29.223858,-29.135018,38.776061,-38.617293,41.038366,-25.709555,...,4.630016,-4.611588,68.842636,-100.433328,4837.797503,4837.883149,68.842636,100.433328,4837.307657,4836.84055
2,4.0,4.0,8.0,21939.299505,47.195847,-47.049498,62.443501,-62.181658,239.835481,-289.433829,...,16.024584,-16.053644,381.982553,-261.247466,1739.686133,1720.459361,381.982553,261.247466,1697.232208,1700.508799
3,4.0,8.0,12.0,19019.739969,42.502465,-42.364267,55.366293,-55.12958,196.775559,-130.854641,...,24.641551,-24.603834,-5.888591,-37.633027,387.769194,456.794768,5.888591,37.633027,387.72448,455.24193
4,6.0,12.0,18.0,17887.570799,39.974318,-39.847393,52.999317,-52.773486,2.19954,-47.136552,...,44.553539,-44.563708,102.428072,-148.85632,2088.246394,2075.7673,102.428072,148.85632,2085.732842,2070.423068
5,10.0,18.0,28.0,22468.162217,53.503671,-53.333807,69.864748,-69.566189,283.881237,-275.642747,...,104.94611,-104.93119,306.150456,-331.745448,2195.92984,2202.902312,306.150456,331.745448,2174.483792,2177.779501
6,12.0,28.0,40.0,16172.820429,39.184876,-39.059252,51.384297,-51.169939,16.666248,-32.641358,...,126.186362,-126.23022,75.466163,-49.699114,1437.472283,1435.032429,75.466163,49.699114,1435.489959,1434.171562
7,15.0,40.0,55.0,14024.573872,38.727937,-38.601464,50.488983,-50.277069,102.895479,-89.612165,...,211.998338,-211.941372,107.488684,-120.601641,3086.982983,3087.248252,107.488684,120.601641,3085.111038,3084.891735
8,20.0,55.0,75.0,10507.739657,31.928416,-31.824467,41.781925,-41.608567,182.150936,-166.683835,...,244.751786,-244.761765,480.868301,-524.103448,865.04035,886.448905,480.868301,524.103448,719.069179,714.917644
9,25.0,75.0,100.0,5406.755452,18.46644,-18.405599,24.489281,-24.382013,65.150295,-71.47485,...,168.416163,-168.415245,547.778301,-620.83532,627.896442,692.560886,547.778301,620.83532,306.908576,306.926841


In [17]:
data_dict_2016["Mass"]["Data_Measured"]['refinedUnc_meanValue']['unfold_Down']/data_dict_2016["Mass"]["Data_Measured"]['refinedUnc_meanValue']['mean'] * 100

0   -0.000880
1    0.000000
2    0.000000
3   -0.004185
4   -0.000130
dtype: float64

In [18]:
data_dict_2016["Pt_1"]["Data_Measured"]['refinedUnc_meanValue']['IdSF_Down']/data_dict_2016["Pt_1"]["Data_Measured"]['refinedUnc_meanValue']['mean'] * 100

0   -0.11523
dtype: float64

In [19]:
data_dict_2016["Pt_4"]["Data_Measured"]['refinedUnc_meanValue']["fsr_Down"]/data_dict_2016["Pt_4"]["Data_Measured"]['refinedUnc_meanValue']['mean'] * 100

0   -0.72927
dtype: float64

In [20]:
0.18282/25.068887*100

0.7292705096959431

In [21]:
ee_data_dict_2016=isr_analysis["2016"]["electron"]["FSRUNFOLD_Acceptance"].getDict("Data")

In [22]:
ee_data_dict_2016["Pt_0"]["Data_Measured"]['refinedUnc_meanValue']

Unnamed: 0,low mass cut,high mass cut,mean,IdSF_Up,IdSF_Down,trgSF_Up,trgSF_Down,PU_Up,PU_Down,unfold_Up,...,Scale_Up,Scale_Down,PDF_Up,PDF_Down,total_Up,total_Down,theory_Up,theory_Down,measurement_Up,measurement_Down
0,50.0,64.0,14.527543,0.047467,-0.049304,0.027327,-0.027254,0.003511,0.001292,0.01111,...,0.099712,-0.117128,0.02851,-0.02851,0.219571,0.228227,0.103708,0.120548,0.193535,0.193793


In [23]:
0.23867/25.535478 * 100

0.9346603968016577

In [24]:
0.047467/14.527543 * 100

0.32673797627031637