In [1]:
from yahist import Hist1D,Hist2D
from yahist.utils import plot_stack
import numpy as np
import json

%pylab inline
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

import mplhep
plt.style.use(mplhep.style.CMS)

Populating the interactive namespace from numpy and matplotlib


## Skim Parameters

In [2]:
skim_v      = 'v4'  #currently available x3 or v4
years       = [ '2016' , '2017' , '2018']
tot_weights = {}
xs          = {}

In [3]:
for year in years:
    try:
        with open('./metadata/xsection_'+year+'_'+skim_v+'.json', "r") as f:
            xs[year] = json.load(f)
    except:
        print ( ' year: ' , year , ' failed to load cross-sections.')
xs

{'2016': {'DYJets': 5941.0,
  'ttbar': 831.76,
  'ZG': 55.6,
  'WG': 191.4,
  'GJets_HT40To100': 23100.0,
  'GJets_HT100To200': 8631.0,
  'GJets_HT200To400': 2280.0,
  'GJets_HT400To600': 273.0,
  'GJets_HT600ToInf': 94.5,
  'QCD_pT30To40': 24810.0,
  'QCD_pT40ToInf': 113400.0,
  'Diphoton': 84.4,
  'ZH': 0.002006453,
  'VH': 0.002257,
  'signal': 0.0098},
 '2017': {'DYJets': 5941.0,
  'ttbar': 831.76,
  'ZG': 55.6,
  'WG': 191.4,
  'GJets_HT40To100': 23100.0,
  'GJets_HT100To200': 8631.0,
  'GJets_HT200To400': 2280.0,
  'GJets_HT400To600': 273.0,
  'GJets_HT600ToInf': 94.5,
  'QCD_pT30To40': 24810.0,
  'QCD_pT40ToInf': 113400.0,
  'Diphoton': 84.4,
  'ZH': 0.002006453,
  'VH': 0.002257,
  'signal': 0.0098},
 '2018': {'DYJets': 5941.0,
  'ttbar': 831.76,
  'ZG': 55.6,
  'WG': 191.4,
  'GJets_HT40To100': 23100.0,
  'GJets_HT100To200': 8631.0,
  'GJets_HT200To400': 2280.0,
  'GJets_HT400To600': 273.0,
  'GJets_HT600ToInf': 94.5,
  'QCD_pT30To40': 24810.0,
  'QCD_pT40ToInf': 113400.0,
  '

In [4]:
for year in years:
    try:
        with open('./metadata/totalWeights_'+year+'_'+skim_v+'.json', "r") as f:
            tot_weights[year] = json.load(f)
    except:
        print ( ' year: ' , year , ' failed to load weights.')
tot_weights

{'2016': {'DYJets': 1953464196801.9966,
  'ZG': 3159980133.1105003,
  'WG': 3417983.0,
  'GJets_HT40To100': 4858154.0,
  'GJets_HT100To200': 4972282.0,
  'GJets_HT200To400': 10404907.0,
  'GJets_HT400To600': 2529729.0,
  'GJets_HT600ToInf': 2463946.0,
  'QCD_pT30To40': 17881707.54097681,
  'QCD_pT40ToInf': 12971481.0,
  'ZH': 1855.5990000000006,
  'VH': 1862202.4355189996,
  'signal': 877908.0},
 '2017': {'DYJets': 2991330898924.88,
  'ZG': 3159980133.1105003,
  'WG': 6282853.0,
  'GJets_HT40To100': 4080543.0,
  'GJets_HT100To200': 9957110.0,
  'GJets_HT200To400': 17253565.0,
  'GJets_HT400To600': 4640128.0,
  'GJets_HT600ToInf': 3278039.0,
  'QCD_pT30To40': 14597800.0,
  'QCD_pT40ToInf': 17374076.0,
  'ZH': 1855.5990000000006,
  'VH': 4100171.4112320007,
  'signal': 877908.0},
 '2018': {'DYJets': 17799598587.564648,
  'ZG': 1514115614.2427292,
  'WG': 6107452.571461947,
  'GJets_HT40To100': 6455649.950711295,
  'GJets_HT100To200': 8521411.571883801,
  'GJets_HT200To400': 12935475.0860

## for each category (e.g. 1lep_1tau) assemble hists from different processes (e.g. signal, diphoton, data...) 

In [5]:
#tag = "basic"
tag = 'basic_dR_mll_cut'
from glob import glob

cat_keys = glob("./hists/" + tag + "/*")
cat_keys = [cat_keys[i].split("/")[-1] for i in range(len(cat_keys))]
cat_keys

['dipho', '0lep_1tau', '0lep_2tau', '1lep_1tau', '2lep_0tau', '1lep_0tau']

In [6]:
process_keys = glob("./hists/" + tag + "/dipho/*")
process_keys = [process_keys[i].split("/")[-1] for i in range(len(process_keys))]
process_keys

['DoubleEG_Run2017B',
 'DoubleEG_Run2017C',
 'DoubleEG_Run2017D',
 'DoubleEG_Run2017E',
 'DoubleEG_Run2017F',
 'DYJets',
 'ZG',
 'WG',
 'GJets_HT40To100',
 'GJets_HT100To200',
 'GJets_HT200To400',
 'GJets_HT400To600',
 'GJets_HT600ToInf',
 'QCD_pT30To40',
 'QCD_pT40ToInf',
 'ZH',
 'VH',
 'signal',
 'EGamma_2018A',
 'EGamma_2018B',
 'EGamma_2018C',
 'EGamma_2018D',
 'DoubleEG_Run2016B',
 'DoubleEG_Run2016C',
 'DoubleEG_Run2016D',
 'DoubleEG_Run2016E',
 'DoubleEG_Run2016F']

In [7]:
hist_keys = glob("./hists/" + tag + '/1lep_0tau/signal/*'+skim_v+'.json')
hist_keys = [hist_keys[i].split("/")[-1] for i in range(len(hist_keys))]
hist_keys

['pho_pT1_2017_v4.json',
 'pho_pT2_2017_v4.json',
 'pho_pTom1_2017_v4.json',
 'pho_pTom2_2017_v4.json',
 'pho_eta1_2017_v4.json',
 'pho_eta2_2017_v4.json',
 'pho_phi1_2017_v4.json',
 'pho_phi2_2017_v4.json',
 'pho_id1_2017_v4.json',
 'pho_id2_2017_v4.json',
 'muon_pT1_2017_v4.json',
 'muon_eta1_2017_v4.json',
 'muon_phi1_2017_v4.json',
 'muon_iso1_2017_v4.json',
 'n_muon_2017_v4.json',
 'electron_pT1_2017_v4.json',
 'electron_eta1_2017_v4.json',
 'electron_phi1_2017_v4.json',
 'electron_iso1_2017_v4.json',
 'n_electron_2017_v4.json',
 'pho_pT1_2018_v4.json',
 'pho_pT2_2018_v4.json',
 'pho_pTom1_2018_v4.json',
 'pho_pTom2_2018_v4.json',
 'pho_eta1_2018_v4.json',
 'pho_eta2_2018_v4.json',
 'pho_phi1_2018_v4.json',
 'pho_phi2_2018_v4.json',
 'pho_id1_2018_v4.json',
 'pho_id2_2018_v4.json',
 'muon_pT1_2018_v4.json',
 'muon_eta1_2018_v4.json',
 'muon_phi1_2018_v4.json',
 'muon_iso1_2018_v4.json',
 'n_muon_2018_v4.json',
 'electron_pT1_2018_v4.json',
 'electron_eta1_2018_v4.json',
 'electron

## Merge 2016, 2017 and 2018

In [8]:
import os

def merge_data_mc_plot(tag, cat, hist_name, savetag):
    # deal with one data/MC plot (e.g. photon pT) for one process (e.g. 1lep_1tau)
    process_names = glob("./hists/" + tag + "/" + cat + "/*")
    process_names = [process_names[i].split("/")[-1] for i in range(len(process_names))]
    
    hists = {}
    norms = {}
    norm_errors = {}
    ## gather all hists from different processes and years
    for process in process_names:
        #if "DY" or "WG" in process: continue
        hist_2016 = "./hists/" + tag + "/" + cat + "/" + process + "/" + hist_name.replace('2017', '2016') + '_' + skim_v + '.json'
        hist_2017 = "./hists/" + tag + "/" + cat + "/" + process + "/" + hist_name + '_' + skim_v + '.json'
        hist_2018 = "./hists/" + tag + "/" + cat + "/" + process + "/" + hist_name.replace('2017', '2018') + '_' + skim_v + '.json'
        
        if os.path.isfile(hist_2016): 
            hist_2016 = Hist1D.from_json(hist_2016)
            if ( 'EGamma' not in process and 'DoubleEG' not in process ):
                hist_2016 = hist_2016/tot_weights['2016'][process]*lumi['2016']*1000*xs['2016'][process]
        
        if os.path.isfile(hist_2017): 
            hist_2017 = Hist1D.from_json(hist_2017)
            if ( 'EGamma' not in process and 'DoubleEG' not in process ):
                hist_2017 = hist_2017/tot_weights['2017'][process]*lumi['2017']*1000*xs['2017'][process]
        
        if os.path.isfile(hist_2018): 
            hist_2018 = Hist1D.from_json(hist_2018)
            if ( 'EGamma' not in process and 'DoubleEG' not in process ):
                hist_2018 = hist_2018/tot_weights['2018'][process]*lumi['2018']*1000*xs['2018'][process]
        
        if ( type(hist_2016) == Hist1D or type(hist_2017) == Hist1D or type(hist_2018) == Hist1D ):
            hists[process] = np.sum( [hist for hist in [hist_2016,hist_2017,hist_2018] if type(hist) == Hist1D ] )
            hists[process].metadata["label"] = process
    
    # data
    hist_data = np.sum( [hists[key] for key in hists.keys() if ( 'EGamma' in key or 'DoubleEG' in key ) ] )
    norms["data"] = hist_data.integral
    norm_errors["data"] = hist_data.integral_error
    hist_MC = [] #[hist_GJets, hist_QCD]
    
    # GJets
    hist_GJets = np.sum( [hists[key] for key in hists.keys() if "GJet" in key] )
    if type(hist_GJets) == Hist1D:
        hist_GJets.metadata["label"] = "GJets"
        hist_GJets.metadata["color"] = colors["GJets"]
        if cat != "0lep_2tau":
            hist_MC.append(hist_GJets)
        norms["GJet"] = hist_GJets.integral
        norm_errors["GJet"] = hist_GJets.integral_error
        
    # QCD
    hist_QCD = np.sum( [hists[key] for key in hists.keys() if "QCD" in key] )
    if type(hist_QCD) == Hist1D:
        hist_QCD.metadata["label"] = "QCD"
        hist_QCD.metadata["color"] = colors["QCD"]
        hist_MC.append(hist_QCD)
        norms["QCD"] = hist_QCD.integral
        norm_errors["QCD"] = hist_QCD.integral_error
        
    # other MC
    others_blacklist = ['EGamma', 'DoubleEG', 'GJet', 'QCD', 'signal', 'ZH', 'VH', 'DYJets']
    for key in hists.keys():
        skip = False
        for forbid_key in others_blacklist:
            if forbid_key in key:
                skip = True
                break
        if skip: continue
        hist_MC.append(hists[key])
        hists[key].metadata["color"] = colors[key]
        norms[key] = hists[key].integral
        norm_errors[key] = hists[key].integral_error
        
    # sum all bkg, for ratio plot
    hist_bkg = np.sum(hist_MC)
    norms["bkg"] = hist_bkg.integral
    norm_errors["bkg"] = hist_bkg.integral_error
    
    fig,(ax1,ax2) = plt.subplots(2,sharex=True,figsize=(12,9),gridspec_kw=dict(height_ratios=[3, 1]))
    hist_data.plot(ax=ax1,histtype="step", label="data", show_errors=True, color="black")
    hists["signal"].plot(ax=ax1,histtype="step", fill = False, label="signal", color="red")
    norms["signal"] = hists["signal"].integral
    norm_errors["signal"] = hists["signal"].integral_error
    plot_stack(hist_MC,ax=ax1)
    ax1.set_ylim(0)
    
    (hist_data/hist_bkg).plot(ax=ax2,show_errors=True,label="data/MC")
    ax2.set_ylim(0.5,1.5)
    
    # guided horizontal line
    xmin, xmax = ax1.get_xlim()
    ax2.hlines(y=1, xmin = xmin, xmax = xmax, linewidth=2, color='r')
    
    basepath = "/home/users/fsetti/public_html/HH2ggtautau/data_mc/" + str(today) + "_" + savetag + "/"
    savepath = basepath + cat + "/" 
    call("mkdir -p " + savepath, shell=True)
    call("cp /home/users/fsetti/scripts/index.php " + savepath, shell=True)
    ax2.set_xlabel(xlabel[hist_name.split('.')[0].split('_201')[0]])
    
    save_name = hist_name.split(".")[0].replace('_2017', '_') + skim_v
    plt.savefig(savepath + save_name + ".pdf")
    plt.savefig(savepath + save_name + ".png")
    plt.close()
    
    with open(savepath + save_name + "_norm.json", "w") as f:
        data = json.dump(norms, f)
    with open(savepath + save_name  + "_normerror.json", "w") as f:
        data = json.dump(norm_errors, f)
    #return norms, norm_errors

## merge muon and electron into lep

In [9]:
%%bash
ls ./hists/basic_dR_mll_cut/1lep_1tau/signal/

dR_tau_e_2016_v4
dR_tau_e_2016_v4.json
dR_tau_e_2017_v4
dR_tau_e_2017_v4.json
dR_tau_e_2018_v4
dR_tau_e_2018_v4.json
dR_tau_lep_2016_v4.json
dR_tau_lep_2017_v4.json
dR_tau_lep_2018_v4.json
dR_tau_mu_2016_v4
dR_tau_mu_2016_v4.json
dR_tau_mu_2017_v4
dR_tau_mu_2017_v4.json
dR_tau_mu_2018_v4
dR_tau_mu_2018_v4.json
electron_eta1_2016_v4
electron_eta1_2016_v4.json
electron_eta1_2017_v4
electron_eta1_2017_v4.json
electron_eta1_2018_v4
electron_eta1_2018_v4.json
electron_iso1_2016_v4
electron_iso1_2016_v4.json
electron_iso1_2017_v4
electron_iso1_2017_v4.json
electron_iso1_2018_v4
electron_iso1_2018_v4.json
electron_pT1_2016_v4
electron_pT1_2016_v4.json
electron_pT1_2017_v4
electron_pT1_2017_v4.json
electron_pT1_2018_v4
electron_pT1_2018_v4.json
electron_phi1_2016_v4
electron_phi1_2016_v4.json
electron_phi1_2017_v4
electron_phi1_2017_v4.json
electron_phi1_2018_v4
electron_phi1_2018_v4.json
lepton_eta1_2016_v4.json
lepton_eta1_2017_v4.json
lepton_eta1_2018_v4.json
lepton_iso1_2016_v4.json
lepton

In [10]:
import os
def merge_lep(tag, cat):
    ## 0tau_1lep
    hists_to_merge = ["muon_pT1", "muon_eta1", "muon_phi1", "muon_iso1", "n_muon"]
    for name in hists_to_merge:
        process_names = glob("./hists/" + tag + "/" + cat + "/*")
        process_names = [process_names[i].split("/")[-1] for i in range(len(process_names))]
        
        #hists_merged = {}
        for process in process_names:
            for year in years:
                histname = "./hists/" + tag + "/" + cat + "/" + process + "/" + name + '_' + year + '_' + skim_v + ".json"
                if not os.path.isfile(histname): continue
                if not os.path.isfile(histname.replace("muon", "electron")): continue
                hist_muon = Hist1D.from_json(histname)
                hist_ele = Hist1D.from_json(histname.replace("muon", "electron")) 
                
                hist_lep = hist_muon + hist_ele
                hist_lep.to_json(histname.replace("muon", "lepton"))
    
## 1tau_1lep
merge_lep("basic_dR_mll_cut", "1lep_0tau")
merge_lep("basic_dR_mll_cut", "1lep_1tau")

tag = "basic_dR_mll_cut"
for process in process_keys:
    for year in years:
        h_dR_name_e  = "./hists/" + tag + "/1lep_1tau/" + process + "/dR_tau_e" +'_' + year +'_'+ skim_v+".json"
        h_dR_name_mu = "./hists/" + tag + "/1lep_1tau/" + process + "/dR_tau_mu"+'_' + year +'_'+ skim_v+".json"
        h_m_name_e   = "./hists/" + tag + "/1lep_1tau/" + process + "/mtaue"    +'_' + year +'_'+ skim_v+".json"
        h_m_name_mu  = "./hists/" + tag + "/1lep_1tau/" + process + "/mtaumu"   +'_' + year +'_'+ skim_v+".json"
        
        if not os.path.isfile(h_dR_name_e): continue
        if not os.path.isfile(h_dR_name_mu): continue
        if not os.path.isfile(h_m_name_e): continue
        if not os.path.isfile(h_m_name_mu): continue
            
        dR_tau_e = Hist1D.from_json(h_dR_name_e)
        dR_tau_mu = Hist1D.from_json(h_dR_name_mu)
        dR_tau_lep = dR_tau_e + dR_tau_mu
        
        mtaue = Hist1D.from_json(h_m_name_e)
        mtaumu = Hist1D.from_json(h_m_name_mu)
        mtaulep = mtaue + mtaumu
        
        dR_tau_lep.to_json(h_dR_name_e.replace("tau_e", "tau_lep"))
        mtaulep.to_json(h_m_name_e.replace("taue", "taulep"))


In [11]:
xlabel = {
    "pho_pT1": "$p_T^{\gamma 1} (GeV)$",
    "pho_pT2": "$p_T^{\gamma 2} (GeV)$",
    "pho_pTom1": "$(p_T/m_{\gamma\gamma})^{\gamma 1} (GeV)$",
    "pho_pTom2": "$(p_T/m_{\gamma\gamma})^{\gamma 2} (GeV)$",
    "pho_eta1": "$\eta^{\gamma 1}$",
    "pho_eta2": "$\eta^{\gamma 2}$",
    "pho_phi1": "$\phi^{\gamma 1}$",
    "pho_phi2": "$\phi^{\gamma 2}$",
    "pho_id1": "$ID^{\gamma 1}$",
    "pho_id2": "$ID^{\gamma 2}$",
    "tau_pT1": "$p_T^{tau 1} (GeV)$",
    "tau_pT2": "$p_T^{tau 2} (GeV)$",
    "tau_eta1": "$\eta^{tau 1}$",
    "tau_eta2": "$\eta^{tau 2}$",
    "tau_phi1": "$\phi^{tau 1}$",
    "tau_phi2": "$\phi^{tau 2}$",
    "tau_deeptau_vs_j_1": "deepTau vs $j^{tau 1}$",
    "tau_deeptau_vs_j_2": "deepTau vs $j^{tau 2}$",
    "tau_deeptau_vs_m_1": "deepTau vs $m^{tau 1}$",
    "tau_deeptau_vs_m_2": "deepTau vs $m^{tau 2}$",
    "tau_deeptau_vs_e_1": "deepTau vs $e^{tau 1}$",
    "tau_deeptau_vs_e_2": "deepTau vs $e^{tau 2}$",
    "electron_pT1": "$p_T^{electron 1} (GeV)$",
    "electron_pT2": "$p_T^{electron 2} (GeV)$",
    "electron_eta1": "$\eta^{electron 1}$",
    "electron_eta2": "$\eta^{electron 2}$",
    "electron_phi1": "$\phi^{electron 1}$",
    "electron_phi2": "$\phi^{electron 2}$",
    "electron_iso1": "$iso^{electron 1}$",
    "electron_iso2": "$iso^{electron 2}$",
    "muon_pT1": "$p_T^{\mu 1} (GeV)$",
    "muon_pT2": "$p_T^{\mu 2} (GeV)$",
    "muon_eta1": "$\eta^{\mu 1}$",
    "muon_eta2": "$\eta^{\mu 2}$",
    "muon_phi1": "$\phi^{\mu 1}$",
    "muon_phi2": "$\phi^{\mu 2}$",
    "muon_iso1": "$iso^{\mu 1}$",
    "muon_iso2": "$iso^{\mu 2}$",
    "lepton_pT1": "$p_T^{lepton 1} (GeV)$",
    "lepton_pT2": "$p_T^{lepton 2} (GeV)$",
    "lepton_eta1": "$\eta^{lepton 1}$",
    "lepton_eta2": "$\eta^{lepton 2}$",
    "lepton_phi1": "$\phi^{lepton 1}$",
    "lepton_phi2": "$\phi^{lepton 2}$",
    "lepton_iso1": "$iso^{lepton 1}$",
    "lepton_iso2": "$iso^{lepton 2}$",
    "n_tau": "$n_{tau}$",
    "n_muon": "$n_{\mu}$",
    "n_electron": "$n_{electron}$",
    "n_lepton": "$n_{lepton}$",
    "dR_tau_e": "$dR(tau,e)$",
    "dR_tau_mu": "$dR(tau,\mu)$",
    "dR_tau_lep": "$dR(tau,lepton)$",
    "dR_tautau": "$dR(tau,tau)$",
    "dR_ee": "$dR(e,e)$",
    "dR_mumu": "$dR(\mu,\mu)$",
    "mtaue": "$m_{tau e}$ (GeV)",
    "mtaumu": "$m_{tau \mu} (GeV)$",
    "mee": "$m_{ee} (GeV)$",
    "mmumu": "$m_{\mu\mu} (GeV)$",
    "mtaulep": "$m_{tau lep} (GeV)$",
    "mtautau": "$m_{tau tau} (GeV)$"
}

In [12]:
colors = {
    
    "ZG": "#E4892F" ,
    "QCD": "#5757E6",
    "GJets": "#57E661",
    "ttbar": "#6F57E6",
    "Diphoton":"#E6576F" ,
    "WG": "#9A57E6"
}

In [13]:
import os.path
from glob import glob
from datetime import date
from subprocess import call

#lumi = 59.0
#luminosities from http://www.t2.ucsd.edu/tastwiki/bin/view/CMS/HHGgTauTauSamples#List_of_data
lumi = { 
    '2016' : 42.3      ,
    '2017' : 41.6      ,
    '2018' : 59.9
}

today = date.today()

def get_data_mc_plot(tag, cat, hist_name, year, savetag):
    # deal with one data/MC plot (e.g. photon pT) for one process (e.g. 1lep_1tau)
    process_names = glob("./hists/" + tag + "/" + cat + "/*")
    process_names = [process_names[i].split("/")[-1] for i in range(len(process_names))]
    
    hists = {}
    norms = {}
    norm_errors = {}
    ## gather all hists from different processes and years
    for process in process_names:
        #if "DY" or "WG" in process: continue
        hist = "./hists/" + tag + "/" + cat + "/" + process + "/" + hist_name + '_' + year + '_' + skim_v + '.json'
        if not os.path.isfile(hist): continue
        hists[process] = Hist1D.from_json(hist)
        if ( 'EGamma' not in process and 'DoubleEG' not in process ):
            hists[process] = hists[process]/tot_weights[year][process]*lumi[year]*1000*xs[year][process]
            hists[process].metadata["label"] = process
    
    
    # data
    hist_data = np.sum( [hists[key] for key in hists.keys() if ( 'EGamma' in key or 'DoubleEG' in key ) ] )
    norms["data"] = hist_data.integral
    norm_errors["data"] = hist_data.integral_error
    hist_MC = [] #[hist_GJets, hist_QCD]
    
    # GJets
    hist_GJets = np.sum( [hists[key] for key in hists.keys() if "GJet" in key] )
    if type(hist_GJets) == Hist1D:
        hist_GJets.metadata["label"] = "GJets"
        hist_GJets.metadata["color"] = colors["GJets"]
        if cat != "0lep_2tau":
            hist_MC.append(hist_GJets)
        norms["GJet"] = hist_GJets.integral
        norm_errors["GJet"] = hist_GJets.integral_error
        
    # QCD
    hist_QCD = np.sum( [hists[key] for key in hists.keys() if "QCD" in key] )
    if type(hist_QCD) == Hist1D:
        hist_QCD.metadata["label"] = "QCD"
        hist_QCD.metadata["color"] = colors["QCD"]
        hist_MC.append(hist_QCD)
        norms["QCD"] = hist_QCD.integral
        norm_errors["QCD"] = hist_QCD.integral_error
        
    # other MC
    others_blacklist = ['EGamma', 'DoubleEG', 'GJet', 'QCD', 'signal', 'ZH', 'VH', 'DYJets']
    for key in hists.keys():
        skip = False
        for forbid_key in others_blacklist:
            if forbid_key in key:
                skip = True
                break
        if skip: continue
        hist_MC.append(hists[key])
        hists[key].metadata["color"] = colors[key]
        norms[key] = hists[key].integral
        norm_errors[key] = hists[key].integral_error
        
    # sum all bkg, for ratio plot
    hist_bkg = np.sum(hist_MC)
    norms["bkg"] = hist_bkg.integral
    norm_errors["bkg"] = hist_bkg.integral_error
    
    fig,(ax1,ax2) = plt.subplots(2,sharex=True,figsize=(12,9),gridspec_kw=dict(height_ratios=[3, 1]))
    hist_data.plot(ax=ax1,histtype="step", label="data", show_errors=True, color="black")
    hists["signal"].plot(ax=ax1,histtype="step", fill = False, label="signal", color="red")
    norms["signal"] = hists["signal"].integral
    norm_errors["signal"] = hists["signal"].integral_error
    plot_stack(hist_MC,ax=ax1)
    ax1.set_ylim(0)
    
    (hist_data/hist_bkg).plot(ax=ax2,show_errors=True,label="data/MC")
    ax2.set_ylim(0.5,1.5)
    
    # guided horizontal line
    xmin, xmax = ax1.get_xlim()
    ax2.hlines(y=1, xmin = xmin, xmax = xmax, linewidth=2, color='r')
    
    basepath = "/home/users/fsetti/public_html/HH2ggtautau/data_mc/" + str(today) + "_" + savetag + "/"
    savepath = basepath + cat + "/" 
    call("mkdir -p " + savepath, shell=True)
    call("cp /home/users/fsetti/scripts/index.php " + savepath, shell=True)
    ax2.set_xlabel(xlabel[hist_name.split('.')[0]])
    
    save_name = hist_name.split(".")[0] + '_' + year + '_' + skim_v
    plt.savefig(savepath + save_name + ".pdf")
    plt.savefig(savepath + save_name + ".png")
    plt.close()
    
    with open(savepath + save_name + "_norm.json", "w") as f:
        data = json.dump(norms, f)
    with open(savepath + save_name  + "_normerror.json", "w") as f:
        data = json.dump(norm_errors, f)
    #return norms, norm_errors
    
#get_data_mc_plot('basic_dR_mll_cut', '1lep_1tau', 'mtaulep', '2017' , 'test')

In [None]:
%%time
def gather_hists(tag, savetag):
    
    from datetime import datetime
    cat_keys = glob("./hists/" + tag + "/*")
    cat_keys = [cat_keys[i].split("/")[-1] for i in range(len(cat_keys))]
    
    for cat in cat_keys:
        now = datetime.now()
        print (cat, now.strftime("%d/%m/%Y %H:%M:%S"))
    
        hist_names = glob('./hists/' + tag + '/' + cat + '/signal/*'+'_'+skim_v+'.json')
        hist_names = list(set([hist_names[i].split("/")[-1].split('_'+skim_v)[0] for i in range(len(hist_names))]))
        hist_names = [ hist_name for hist_name in hist_names if '2017' in hist_name ]
        for hist_name in hist_names:
            for year in years:
                get_data_mc_plot(tag, cat, hist_name.split('_201')[0], year, savetag)
            merge_data_mc_plot( tag , cat , hist_name, savetag )
gather_hists('basic_dR_mll_cut', 'test')

dipho 31/01/2021 22:20:45


findfont: Font family ['cursive'] not found. Falling back to DejaVu Sans.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output

'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The 

'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The 

0lep_1tau 31/01/2021 22:31:11


'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The 

0lep_2tau 31/01/2021 22:34:45


'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The 

'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The 

1lep_1tau 31/01/2021 22:40:08


'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The entire font will be embedded in the output.
'texgyreheros-regular.otf' can not be subsetted into a Type 3 font. The 

In [66]:
%%bash
chmod -R 755 /home/users/fsetti/public_html/HH2ggtautau/data_mc/
ls -lrht /home/users/fsetti/public_html/HH2ggtautau/data_mc/ | tail -n 4

total 24K
drwxr-xr-x+ 8 fsetti fsetti 4.0K Jan 26 17:11 2021-01-26_test
drwxr-xr-x+ 8 fsetti fsetti 4.0K Jan 28 01:28 2021-01-28_test
drwxr-xr-x+ 8 fsetti fsetti 4.0K Jan 31 19:52 2021-01-31_test


In [20]:
def make_table(yielddir, cat, histname):
    norm_file = yielddir + cat + "/" + histname + "_norm.json"
    normerror_file = yielddir + cat + "/" + histname + "_normerror.json"
    
    with open(norm_file, "r") as f:
        norms = json.load(f)
    with open(normerror_file, "r") as f:
        normerrors = json.load(f)
        
    for key in norms.keys():
        print ("{}: {:.2f} \pm {:.2f}".format(key, norms[key], normerrors[key]))
    
def make_table_all(yielddir, histname):
    cat_keys = ["dipho", "1lep_0tau", "0lep_1tau", "1lep_1tau", "0lep_2tau", "2lep_0tau"]
    process_keys = ["GJet","QCD","ttbar","ZG","WG","Diphoton","bkg","data","signal"]
    
    norms_allcats = {}
    normerrors_allcats = {}
    for cat in cat_keys:
        norm_file = yielddir + cat + "/" + histname + "_norm.json"
        normerror_file = yielddir + cat + "/" + histname + "_normerror.json"
        with open(norm_file, "r") as f:
            norms = json.load(f)
        with open(normerror_file, "r") as f:
            normerrors = json.load(f)
        norms_allcats[cat] = norms
        normerrors_allcats[cat] = normerrors
    
    print ("|-")
    print ("| |", "|".join(cat_keys))
    print ("|-")
    print ("|-")
    for process in process_keys:
        row_content = []
        for cat in cat_keys:
            row_content.append("{:.2f} $\pm$ {:.2f}".format(norms_allcats[cat][process], normerrors_allcats[cat][process]) )
        if process == "bkg" or process == "signal":
            print ("|-")
        print ("| {} | {}".format(process, "|".join(row_content)))
    print ("|-")

#make_table("/home/users/hmei/public_html/2021/2021-01-18_test/", "dipho", "pho_pT1")
make_table_all("/home/users/fsetti/public_html/HH2ggtautau/data_mc/2021-01-26_test/", "pho_pT1")

|-
| | dipho|1lep_0tau|0lep_1tau|1lep_1tau|0lep_2tau|2lep_0tau
|-
|-
| GJet | 1336682.45 $\pm$ 11395.01|2941.66 $\pm$ 475.38|19051.50 $\pm$ 1214.49|0.00 $\pm$ 0.00|6.99 $\pm$ 6.99|65.31 $\pm$ 52.39
| QCD | 728856.95 $\pm$ 15090.55|749.65 $\pm$ 469.76|11333.25 $\pm$ 1899.82|0.00 $\pm$ 0.00|0.00 $\pm$ 0.00|0.00 $\pm$ 0.00
| ttbar | 8602.45 $\pm$ 147.41|1526.04 $\pm$ 61.19|288.76 $\pm$ 26.88|1.91 $\pm$ 2.99|0.94 $\pm$ 0.94|12.28 $\pm$ 6.35
| ZG | 17642.09 $\pm$ 102.47|9015.91 $\pm$ 73.49|616.76 $\pm$ 19.10|17.39 $\pm$ 3.08|6.28 $\pm$ 2.06|246.22 $\pm$ 12.02
| WG | 6936.98 $\pm$ 113.16|819.97 $\pm$ 38.83|126.97 $\pm$ 15.29|0.00 $\pm$ 0.00|0.00 $\pm$ 0.00|0.00 $\pm$ 0.00
| Diphoton | 249266.17 $\pm$ 335.27|949.82 $\pm$ 18.34|6306.63 $\pm$ 44.21|1.47 $\pm$ 0.39|8.30 $\pm$ 1.53|15.81 $\pm$ 1.98
|-
| bkg | 2347987.10 $\pm$ 18913.71|16003.06 $\pm$ 676.50|37723.87 $\pm$ 2255.57|20.77 $\pm$ 4.31|15.51 $\pm$ 2.73|339.63 $\pm$ 54.16
| data | 3064836.00 $\pm$ 1750.67|25040.00 $\pm$ 158.24|39500.00 $

In [33]:
yield_dir = "/home/users/fsetti/public_html/HH2ggtautau/data_mc/2021-01-26_test/"
for cat in cat_keys:
    print (cat)
    make_table(yield_dir, cat, "pho_pT1")
    print ("###")

dipho
data: 3064836.00 \pm 1750.67
GJet: 1336682.45 \pm 11395.01
QCD: 728856.95 \pm 15090.55
ttbar: 8602.45 \pm 147.41
ZG: 17642.09 \pm 102.47
WG: 6936.98 \pm 113.16
Diphoton: 249266.17 \pm 335.27
bkg: 2347987.10 \pm 18913.71
signal: 236.14 \pm 0.37
###
0lep_1tau
data: 39500.00 \pm 198.75
GJet: 19051.50 \pm 1214.49
QCD: 11333.25 \pm 1899.82
ttbar: 288.76 \pm 26.88
ZG: 616.76 \pm 19.10
WG: 126.97 \pm 15.29
Diphoton: 6306.63 \pm 44.21
bkg: 37723.87 \pm 2255.57
signal: 74.06 \pm 0.21
###
0lep_2tau
data: 23.00 \pm 4.80
GJet: 6.99 \pm 6.99
QCD: 0.00 \pm 0.00
ttbar: 0.94 \pm 0.94
ZG: 6.28 \pm 2.06
WG: 0.00 \pm 0.00
Diphoton: 8.30 \pm 1.53
bkg: 15.51 \pm 2.73
signal: 11.89 \pm 0.08
###
1lep_1tau
data: 31.00 \pm 5.57
GJet: 0.00 \pm 0.00
QCD: 0.00 \pm 0.00
ttbar: 1.91 \pm 2.99
ZG: 17.39 \pm 3.08
WG: 0.00 \pm 0.00
Diphoton: 1.47 \pm 0.39
bkg: 20.77 \pm 4.31
signal: 9.33 \pm 0.07
###
2lep_0tau
data: 227.00 \pm 15.07
GJet: 65.31 \pm 52.39
QCD: 0.00 \pm 0.00
ttbar: 12.28 \pm 6.35
ZG: 246.22 \pm 12.

In [142]:
make_table("/home/users/hmei/public_html/2021/2021-01-19_test/", "1lep_0tau", "n_lepton")

data: 25040.00 \pm 158.24
GJet: 2957.11 \pm 477.49
ttbar: 0.72 \pm 0.01
ZG: 84.61 \pm 0.43
WG: 824.65 \pm 39.05
Diphoton: 4307.99 \pm 57.79
bkg: 8175.07 \pm 482.56
signal: 47.58 \pm 0.17


In [141]:
make_table("/home/users/hmei/public_html/2021/2021-01-19_test/", "1lep_0tau", "lepton_pT1")

data: 25040.00 \pm 158.24
GJet: 2941.66 \pm 475.38
ttbar: 1526.04 \pm 61.19
ZG: 9015.91 \pm 73.49
WG: 819.97 \pm 38.83
Diphoton: 949.82 \pm 18.34
bkg: 15253.41 \pm 486.80
signal: 47.57 \pm 0.17


In [140]:
make_table("/home/users/hmei/public_html/2021/2021-01-19_test/", "1lep_0tau", "pho_pT1")

data: 25040.00 \pm 158.24
GJet: 2941.66 \pm 475.38
QCD: 749.65 \pm 469.76
ttbar: 1526.04 \pm 61.19
ZG: 9015.91 \pm 73.49
WG: 819.97 \pm 38.83
Diphoton: 949.82 \pm 18.34
bkg: 16003.06 \pm 676.50
signal: 47.57 \pm 0.17


In [66]:
%%bash
ls hists/basic/1lep_1tau/signal/*json

hists/basic/1lep_1tau/signal/dR_tau_e.json
hists/basic/1lep_1tau/signal/dR_tau_mu.json
hists/basic/1lep_1tau/signal/electron_eta1.json
hists/basic/1lep_1tau/signal/electron_iso1.json
hists/basic/1lep_1tau/signal/electron_phi1.json
hists/basic/1lep_1tau/signal/electron_pT1.json
hists/basic/1lep_1tau/signal/mtaue.json
hists/basic/1lep_1tau/signal/mtaumu.json
hists/basic/1lep_1tau/signal/muon_eta1.json
hists/basic/1lep_1tau/signal/muon_iso1.json
hists/basic/1lep_1tau/signal/muon_phi1.json
hists/basic/1lep_1tau/signal/muon_pT1.json
hists/basic/1lep_1tau/signal/n_electron.json
hists/basic/1lep_1tau/signal/n_muon.json
hists/basic/1lep_1tau/signal/n_tau.json
hists/basic/1lep_1tau/signal/pho_eta1.json
hists/basic/1lep_1tau/signal/pho_eta2.json
hists/basic/1lep_1tau/signal/pho_id1.json
hists/basic/1lep_1tau/signal/pho_id2.json
hists/basic/1lep_1tau/signal/pho_phi1.json
hists/basic/1lep_1tau/signal/pho_phi2.json
hists/basic/1lep_1tau/signal/pho_pT1.json
hists/basic/1lep_1tau/signal/pho_pT2.json
