# Generate a template of the weighted average of two samples.

In [3]:
import ROOT as r
from ROOT import gStyle
import numpy as np
import ctypes
import os
import pandas as pd

def model_uncern(covariance_matrix,x):
    var_a = covariance_matrix[0][0]
    var_b = covariance_matrix[1][1]
    var_c = covariance_matrix[2][2]
    
    cov_ab = covariance_matrix[0][1]
    cov_ac = covariance_matrix[0][2]
    cov_bc = covariance_matrix[1][2]
    
    diagonal_term = var_a*(x**4) + var_b*(x**2) + var_c
    no_diagonal_term = 2*cov_ab*(x**3) + 2*cov_ac*(x**2) + 2*cov_bc*x
    
    return np.sqrt(diagonal_term+no_diagonal_term)

def parabolic_shape(mjj,parameters):
    a = parameters[0]
    b = parameters[1]
    c = parameters[2]
    return a*mjj*mjj+b*mjj+c

model_uncer = np.vectorize(model_uncern)
parabolic_shape = np.vectorize(parabolic_shape,excluded=['parameters'])

fitParams = {"Sherpa": [ 1.31867345e-07, -6.91194368e-04,  1.49568233e+00], 
              "MG" : [ 1.35194973e-07, -5.44814667e-04,  9.89623841e-01] }

covarianceMatrix =  {"Sherpa" : [[ 1.19177067e-17, -2.94876918e-14,  1.41446066e-11],
                               [-2.94876918e-14,  8.14817316e-11, -4.30728335e-08],
                               [ 1.41446066e-11, -4.30728335e-08,  2.66545658e-05]],
                    "MG" : [[ 6.10103774e-18, -1.39152060e-14,  6.28353598e-12],
                           [-1.39152060e-14,  3.55524563e-11, -1.77055103e-08],
                           [ 6.28353598e-12, -1.77055103e-08,  1.02381778e-05]]}

def scaleBinUncertainty(histogram,sampleName):
    
    sampleType = "MG"
    if "Sherpa" in sampleName:
        sampleType = "Sherpa"
    
    for i in range(1,histogram.GetNbinsX()+1):
        x = histogram.GetBinCenter(i)
        error = histogram.GetBinError(i)
        rw = parabolic_shape(x,parameters=fitParams[sampleType])
        rw_error = model_uncern(covarianceMatrix[sampleType],x)
        newError = np.sqrt(error**2 + ((rw_error/rw)**2)*(error**2))
        histogram.SetBinError(i,newError)

def biner(edges,bin_widths,histogram):
    if (len(edges)+1!=len(bin_widths)):
        print("Check edges and bin widths array sizes!")
        return
    
    bins=[]
    first_bin = histogram.GetXaxis().GetBinLowEdge(1)
    last_bin = histogram.GetXaxis().GetBinUpEdge(histogram.GetNbinsX())
    for i in range(0,len(edges)):
        n_spaces = int((edges[i] - first_bin)/bin_widths[i])
        bins = np.concatenate((bins,np.linspace(first_bin,edges[i],n_spaces,endpoint=False)))
        first_bin = edges[i]
        if edges[i]==edges[-1]:
            n_spaces = int((last_bin - edges[i])/bin_widths[i+1])
            bins = np.concatenate((bins,np.linspace(edges[i],last_bin,n_spaces,endpoint=False)))
            bins = np.concatenate((bins,[last_bin]))
    return bins

def normalization(hist_list,norm_bin):
    for hist in hist_list:
        for i in range(1,hist.GetNbinsX()+1):
            value=hist.GetBinContent(i)
            error=hist.GetBinError(i)
            sf=hist.GetBinWidth(i)/norm_bin
            hist.SetBinContent(i,value/sf)
            hist.SetBinError(i,error/sf)


def Unpack_Histo(histogram_file):
    bin_content = []
    bin_content_uncer =[]
    for i in range(1,histogram_file.GetNbinsX()+1):
        bin_content.append(histogram_file.GetBinContent(i))
        bin_content_uncer.append(histogram_file.GetBinError(i))
        
    return np.array(bin_content),np.array(bin_content_uncer)

############# CONFIGURING THE JOB ###########################
samples_path = "/Users/diegomac/Documents/HEP/VBF-Analysis/MuMu/Plots/AnalysisCrossCheck/SR/"
sample1_name = "Zmumu_SherpaRW"
sample2_name = "Zmumu_MGRW"

qcd_1=0.961
qcd_2=0.939

fs_name = "Zmumu"
EWSampleUsedForExtraction = "PoPy"
EWSampleUsedForExtraction = "_EW-"+EWSampleUsedForExtraction

############# Z->ll HISTOS ##########################
if 1:
    histos_no_rebin={
    "n_bjets":[],
    "lepiso":[],
    "n_jets_interval":[],
    "delta_R_leplep_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl":[],
    "delta_R_lep1jet_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl":[],
    "delta_R_lep2jet_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl":[],    
    }
    histos_rebin={
    "delta_phi":[[2.0],[0.2,0.8],0.2],
    "lep1_eta_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl":[[0.1],[0.2,0.199],0.2],
    "lep2_eta_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl":[[0.1],[0.2,0.199],0.2],
    "ljet0_eta_basic_cuts_ptl":[[-3.0,3.0],[0.5,0.2,0.5],0.2],
    "ljet1_eta_basic_cuts_ptl":[[-3.0,3.0],[0.5,0.2,0.5],0.2],
    "lep1_pt":[[300],[20,50],20],
    "lep2_pt":[[300],[20,50],20],
    "ljet0_pt":[[75,460],[15,35,54],15],
    "ljet1_pt":[[70,440],[10,37,56],10],
    "ljet2_pt_basic_cuts_ptl":[[100],[20,50],20],
    "pt_bal":[[0.15,0.3],[0.03,0.05,0.7],0.15],
    "Z_centrality":[[0.5],[0.1,0.5],0.1],
    "delta_y":[[2.0,6.0],[1.0,0.5,1.0],1.0],
    "inv_mass":[[70,110,140],[10,5,10,20],5],
    "mass_jj":[[1500,3000],[250,500,1000],250],
    "Z_pt_reco_basic_cuts_ptl":[[300,600],[20,50,200],20],
    "vec_sum_pt_jets_basic_cuts_ptl":[[300],[20,50],20],
    "ratio_zpt_sumjetpt_basic_cuts_ptl":[[0.75,1.25],[0.25,0.1,0.25],0.1],
    "met_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl":[[50],[10,50],10],
    }


##################### Z->tau tau #################
if 0:
    histos_no_rebin={
    "n_bjets":[],
    "lepiso":[],
    "n_jets_interval":[],
    }

    histos_rebin = {
    "tau_pt":[[80.0,160.0],[20.0,40.0,170.0],20.0],
    "lep_pt":[[80,160],[20,40,170],20],
    "delta_phi":[[3.0],[0.3,0.2],0.3],
    "lep_eta_basic_dphi_drap_btag_iso_rnn_ptl_j1pt_j2pt_ptbal_mjj_nji_zcen_omega_mreco_tpt":[[0.5],[0.5,0.5],0.5],
    "tau_eta_basic_dphi_drap_btag_iso_rnn_ptl_j1pt_j2pt_ptbal_mjj_nji_zcen_omega_mreco_tpt":[[0.5],[0.5,0.5],0.5],
    "delta_R_taulep_basic_dphi_drap_btag_iso_rnn_ptl_j1pt_j2pt_ptbal_mjj_nji_zcen_omega_mreco_tpt":[[0.5],[0.5,0.5],0.5],
    "delta_R_lepjet_basic_dphi_drap_btag_iso_rnn_ptl_j1pt_j2pt_ptbal_mjj_nji_zcen_omega_mreco_tpt":[[0.5],[0.5,0.5],0.5],
    "delta_R_taujet_basic_dphi_drap_btag_iso_rnn_ptl_j1pt_j2pt_ptbal_mjj_nji_zcen_omega_mreco_tpt":[[0.5],[0.5,0.5],0.5],
    "met_basic_dphi":[[100],[20,100],20],
    "delta_y":[[6.0],[1.0,4.0],1.0],
    "omega":[[-0.2,1.6],[2.8,0.3,1.4],0.3],
    "rnn_score_1p":[[0.25],[0.25,0.25],0.25],
    "rnn_score_3p":[[0.4],[0.2,0.1999],0.2],
    "ljet0_pt":[[400,600],[50,100,400],50],
    "ljet1_pt":[[150,350],[50,100,650],50],
    "pt_bal":[[0.15],[0.03,0.75],0.03],
    "Z_centrality":[[0.5],[0.1,0.5],0.1],
    "mass_jj":[[1500,3000],[250,500,1000],250],
    "reco_mass_i":[[40,65,115,175],[40,25,10,15,65],10],
    "reco_mass_o":[[40,65,115,175],[40,25,10,15,65],10],
    "reco_mass_":[[40,65,115,175],[40,25,10,15,65],10],
    "Z_pt_reco_i_basic_cuts_tpt":[[300],[50,100],50]
    }


total_histos ={**histos_no_rebin,**histos_rebin}

In [4]:
r.TH1.AddDirectory(r.kFALSE)
file1 = r.TFile.Open(samples_path+sample1_name+'.root')
file2 = r.TFile.Open(samples_path+sample2_name+'.root')

myFile =r.TFile.Open(fs_name+"_Average"+EWSampleUsedForExtraction+".root", "RECREATE")
r.TH1.AddDirectory(r.kFALSE)    

for histo in total_histos:

    h1 = file1.Get(histo)
    h2 = file2.Get(histo)
    
    h1.Scale(qcd_1)
    h2.Scale(qcd_2)
    
    if histo in histos_rebin:
    
        rebining=biner(total_histos[histo][0],total_histos[histo][1],h1)
        nb=len(rebining)-1
        h1=h1.Rebin(nb,histo,rebining)
        h2=h2.Rebin(nb,histo,rebining)
        
        if "mass_jj" in histo:
            scaleBinUncertainty(h1,sample1_name)
            scaleBinUncertainty(h2,sample2_name)
        
        #h1.Scale(1.0/h1.Integral(1,-1))
        #h2.Scale(1.0/h2.Integral(1,-1))

        #hist_list=[h1,h2]
        #normalization(hist_list,total_histos[histo][2])
    
    h1.SetBit(r.TH1.kIsAverage)
    h2.SetBit(r.TH1.kIsAverage)
    
    final_hist = h1.Clone()
    final_hist.Add(h2)

    myFile.WriteObject(final_hist,histo)

myFile.Close()

os.system("mv "+fs_name+"_Average"+EWSampleUsedForExtraction+".root "+samples_path+"/"+fs_name+"_Average"+EWSampleUsedForExtraction+".root")

0

