In [1]:
import numpy as np
import matplotlib.pyplot as plt 
import pickle
import sklearn.metrics as metrics
import os

In [2]:
filelist = os.listdir("../../outputs/cr_investigations")
filelist.remove(".ipynb_checkpoints")

In [3]:
filelist

['pt200_akt01_sdmass_argmin.pkl',
 'pt200_akt02_sdmass_argmin.pkl',
 'pt200_akt03_sdmass_argmin.pkl',
 'pt200_akt04_sdmass_argmin.pkl',
 'pt300_akt01_sdmass_argmin.pkl',
 'pt300_akt02_sdmass_argmin.pkl',
 'pt300_akt03_sdmass_argmin.pkl',
 'pt300_akt04_sdmass_argmin.pkl',
 'pt400_akt01_sdmass_argmin.pkl',
 'pt400_akt02_sdmass_argmin.pkl',
 'pt400_akt03_sdmass_argmin.pkl',
 'pt400_akt04_sdmass_argmin.pkl',
 'pt500_akt01_sdmass_argmin.pkl',
 'pt500_akt02_sdmass_argmin.pkl',
 'pt500_akt03_sdmass_argmin.pkl',
 'pt500_akt04_sdmass_argmin.pkl',
 'pt600_akt01_sdmass_argmin.pkl',
 'pt600_akt02_sdmass_argmin.pkl',
 'pt600_akt03_sdmass_argmin.pkl',
 'pt600_akt04_sdmass_argmin.pkl',
 'pt200_akt01_regmass_argmin.pkl',
 'pt200_akt02_regmass_argmin.pkl',
 'pt200_akt03_regmass_argmin.pkl',
 'pt200_akt04_regmass_argmin.pkl',
 'pt300_akt01_regmass_argmin.pkl',
 'pt300_akt02_regmass_argmin.pkl',
 'pt300_akt03_regmass_argmin.pkl',
 'pt300_akt04_regmass_argmin.pkl',
 'pt400_akt01_regmass_argmin.pkl',
 'pt4

In [4]:
#2017 integrated luminosity and QCD cross sections
IL = 44.99
xs_170to300 = 103700
xs_300to470 = 6835
xs_470to600 = 549.5
xs_600to800 = 156.5
xs_800to1000 = 26.22
xs_1000to1400 = 7.475
xs_1400to1800 = 0.6482
xs_1800to2400 = 0.08742
xs_2400to3200 = 0.005237
xs_3200toInf = 0.0001353

In [5]:
master_dict = {}

In [6]:
for k in filelist:
    file = "../../outputs/cr_investigations/" + str(k)
    with open(file, "rb") as f:
        vars = pickle.load(f)

    hgg = vars[0]['Hgg']['Hgg']
    hbb = vars[0]['Hbb']['Hbb']
    #signal scale factors
    scalesHJ = ((44.99*(0.471*1000)*0.0817)/(hgg['entries']))
    scalesHbb = ((44.99*(0.274*1000)*0.581)/(hbb['entries']))
    #do the scaling 
    hgg_entries = list(hgg.keys())
    for i in range(1, len(hgg_entries)):
        hgg[hgg_entries[i]].view(flow=True)[:] *= scalesHJ
    
    hbb_entries = list(hgg.keys())
    for i in range(1, len(hbb_entries)):
        hbb[hbb_entries[i]].view(flow=True)[:] *= scalesHbb

    #get the totals for each histogram
    hgg_totals_dict = {}
    for i in range(1, len(hgg_entries)):
        hgg_totals_dict[hgg_entries[i]] = hgg[hgg_entries[i]][0:len(hgg[hgg_entries[i]].view()):sum]
    
    hbb_totals_dict = {}
    for i in range(1, len(hbb_entries)):
        hbb_totals_dict[hbb_entries[i]] = hbb[hbb_entries[i]][0:len(hbb[hbb_entries[i]].view()):sum]

    #get the true positive fractions
    hgg_truth_dict = {}
    for i in range(1, len(hgg_entries)):
        temp_list = []
        for j in range(1, len(hgg[hgg_entries[i]].view())+1):
            temp_list.append(hgg[hgg_entries[i]][0:j:sum].value/hgg_totals_dict[hgg_entries[i]].value)
        hgg_truth_dict[hgg_entries[i]] = temp_list
    
    hbb_truth_dict = {}
    for i in range(1, len(hbb_entries)):
        temp_list = []
        for j in range(1, len(hbb[hbb_entries[i]].view())+1):
            temp_list.append(hbb[hbb_entries[i]][0:j:sum].value/hbb_totals_dict[hbb_entries[i]].value)
        hbb_truth_dict[hbb_entries[i]] = temp_list

    #combine the qcds into a dictionary
    qcd_dict = {}
    qcd_dict['q173'] = vars[0]['QCD_Pt_170to300_TuneCP5_13TeV_pythia8']['QCD_Pt_170to300']
    qcd_dict['q347'] = vars[0]['QCD_Pt_300to470_TuneCP5_13TeV_pythia8']['QCD_Pt_300to470']
    qcd_dict['q476'] = vars[0]['QCD_Pt_470to600_TuneCP5_13TeV_pythia8']['QCD_Pt_470to600']
    qcd_dict['q68'] = vars[0]['QCD_Pt_600to800_TuneCP5_13TeV_pythia8']['QCD_Pt_600to800']
    qcd_dict['q810'] = vars[0]['QCD_Pt_800to1000_TuneCP5_13TeV_pythia8']['QCD_Pt_800to1000']
    qcd_dict['q1014'] = vars[0]['QCD_Pt_1000to1400_TuneCP5_13TeV_pythia8']['QCD_Pt_1000to1400']
    qcd_dict['q1418'] = vars[0]['QCD_Pt_1400to1800_TuneCP5_13TeV_pythia8']['QCD_Pt_1400to1800']
    qcd_dict['q1824'] = vars[0]['QCD_Pt_1800to2400_TuneCP5_13TeV_pythia8']['QCD_Pt_1800to2400']
    qcd_dict['q2432'] = vars[0]['QCD_Pt_2400to3200_TuneCP5_13TeV_pythia8']['QCD_Pt_2400to3200']
    qcd_dict['q32inf'] = vars[0]['QCD_Pt_3200toInf_TuneCP5_13TeV_pythia8']['QCD_Pt_3200toInf']

    #QCD scale factors dictionary
    qcd_sf_dict = {}
    qcd_sf_dict['scales170to300'] = (((xs_170to300*1000)*IL)/(qcd_dict['q173']['entries']))
    qcd_sf_dict['scales300to470'] = (((xs_300to470*1000)*IL)/(qcd_dict['q347']['entries']))
    qcd_sf_dict['scales470to600'] = (((xs_470to600*1000)*IL)/(qcd_dict['q476']['entries']))
    qcd_sf_dict['scales600to800'] = (((xs_600to800*1000)*IL)/(qcd_dict['q68']['entries']))
    qcd_sf_dict['scales800to1000'] = (((xs_800to1000*1000)*IL)/(qcd_dict['q810']['entries']))
    qcd_sf_dict['scales1000to1400'] = (((xs_1000to1400*1000)*IL)/(qcd_dict['q1014']['entries']))
    qcd_sf_dict['scales1400to1800'] = (((xs_1400to1800*1000)*IL)/(qcd_dict['q1418']['entries']))
    qcd_sf_dict['scales1800to2400'] = (((xs_1800to2400*1000)*IL)/(qcd_dict['q1824']['entries']))
    qcd_sf_dict['scales2400to3200'] = (((xs_2400to3200*1000)*IL)/(qcd_dict['q2432']['entries']))
    qcd_sf_dict['scales3200toInf'] = (((xs_3200toInf*1000)*IL)/(qcd_dict['q32inf']['entries']))

    #scale all the qcd values
    entries = list(qcd_dict['q173'].keys())
    for i in range(0, len(qcd_dict)):
        qcd_range = list(qcd_dict.keys())[i]
        qcd_scales = list(qcd_sf_dict.keys())[i]
        for j in range(1, len(entries)):
            qcd_dict[qcd_range][entries[j]].view(flow=True)[:] *= qcd_sf_dict[qcd_scales]

    #combine the qcds into individual variable fields
    qcd_vars = {}
    for i in range(1, len(entries)):
        temp_hist = qcd_dict['q173'][entries[i]]
        for j in range(1, len(qcd_dict)):
            temp_hist += qcd_dict[list(qcd_dict.keys())[j]][entries[i]]
        qcd_vars[entries[i]] = temp_hist

    #totals for each qcd hist
    qcd_totals_dict = {}
    for i in range(1, len(entries)):
        qcd_totals_dict[entries[i]] = qcd_vars[entries[i]][0:len(qcd_vars[entries[i]].view()):sum]

    #false positive fractions for each qcd variable
    qcd_false_positive_dict = {}
    for i in range(1, len(entries)):
        temp_list = []
        for j in range(1, len(qcd_vars[entries[i]].view())+1):
            temp_list.append(qcd_vars[entries[i]][0:j:sum].value/qcd_totals_dict[entries[i]].value)
        qcd_false_positive_dict[entries[i]] = temp_list

    hgg_auc_dict = {}
    for i in range(1, len(hgg_entries)):
        hgg_auc_dict[hgg_entries[i]] = metrics.auc(
                                            hgg_truth_dict[hgg_entries[i]],
                                            qcd_false_positive_dict[hgg_entries[i]]
                                        )
    for i in range(1, len(hgg_entries)):
        if hgg_auc_dict[hgg_entries[i]] >= 0.5:
            hgg_auc_dict[hgg_entries[i]] = 1 - hgg_auc_dict[hgg_entries[i]]

    hbb_auc_dict = {}
    for i in range(1, len(hbb_entries)):
        hbb_auc_dict[hgg_entries[i]] = metrics.auc(
                                            hbb_truth_dict[hbb_entries[i]],
                                            qcd_false_positive_dict[hbb_entries[i]]
                                        )
    for i in range(1, len(hbb_entries)):
        if hbb_auc_dict[hbb_entries[i]] >= 0.5:
            hbb_auc_dict[hbb_entries[i]] = 1 - hbb_auc_dict[hbb_entries[i]]

    category = str(k)[:-4]

    master_dict[category] = {}
    master_dict[category]['Hgg'] = hgg_auc_dict
    master_dict[category]['Hbb'] = hbb_auc_dict

In [7]:
hgg_list = {}
for i in master_dict:
    hgg_list[i] = master_dict[i]['Hgg']['Color_Ring']

In [8]:
min(hgg_list, key=hgg_list.get)

'pt400to500_akt02_regmass_argmax'

In [9]:
hgg_list[min(hgg_list, key=hgg_list.get)]

0.41868115229110436

In [10]:
hbb_list = {}
for i in master_dict:
    hbb_list[i] = master_dict[i]['Hbb']['Color_Ring']

In [11]:
min(hbb_list, key=hbb_list.get)

'pt400to500_akt02_regmass_argmax'

In [12]:
hbb_list[min(hbb_list, key=hbb_list.get)]

0.39117847339000755