In [1]:
import numpy as np
import matplotlib.pyplot as plt 
import pickle
import sklearn.metrics as metrics
import os
import numba 
import copy

In [2]:
filelist = os.listdir("../../../outputs/cr_investigations/multi_var_hists")
filelist.remove(".ipynb_checkpoints")

In [3]:
path = "../../../outputs/cr_investigations/multi_var_hists/cambridge_argmax.pkl"
with open(path, 'rb') as f:
    cring = pickle.load(f)

In [4]:
#2017 integrated luminosity and QCD cross sections
IL = 44.99
xs_170to300 = 103700
xs_300to470 = 6835
xs_470to600 = 549.5
xs_600to800 = 156.5
xs_800to1000 = 26.22
xs_1000to1400 = 7.475
xs_1400to1800 = 0.6482
xs_1800to2400 = 0.08742
xs_2400to3200 = 0.005237
xs_3200toInf = 0.0001353

In [5]:
master_dict = {}

In [6]:
signal_dict = {}
signal_dict['Hgg'] = cring[0]['Hgg']['Hgg']
signal_dict['Hbb'] = cring[0]['Hbb']['Hbb']

hgg_scaled = signal_dict['Hgg']
hbb_scaled = signal_dict['Hbb']

#signal scale factors
scalesHJ = ((44.99*(0.471*1000)*0.0817)/(hgg_scaled['entries']))
scalesHbb = ((44.99*(0.274*1000)*0.581)/(hbb_scaled['entries']))

#do the scaling 
hgg_entries = list(hgg_scaled.keys())
for i in range(1, len(hgg_entries)):
    hgg_scaled[hgg_entries[i]].view(flow=True)[:] *= scalesHJ

hbb_entries = list(hbb_scaled.keys())
for i in range(1, len(hbb_entries)):
    hbb_scaled[hbb_entries[i]].view(flow=True)[:] *= scalesHbb

In [7]:
#combine the qcds into a dictionary
qcd_dict = {}
qcd_dict['q173'] = cring[0]['QCD_Pt_170to300_TuneCP5_13TeV_pythia8']['QCD_Pt_170to300']
qcd_dict['q347'] = cring[0]['QCD_Pt_300to470_TuneCP5_13TeV_pythia8']['QCD_Pt_300to470']
qcd_dict['q476'] = cring[0]['QCD_Pt_470to600_TuneCP5_13TeV_pythia8']['QCD_Pt_470to600']
qcd_dict['q68'] = cring[0]['QCD_Pt_600to800_TuneCP5_13TeV_pythia8']['QCD_Pt_600to800']
qcd_dict['q810'] = cring[0]['QCD_Pt_800to1000_TuneCP5_13TeV_pythia8']['QCD_Pt_800to1000']
qcd_dict['q1014'] = cring[0]['QCD_Pt_1000to1400_TuneCP5_13TeV_pythia8']['QCD_Pt_1000to1400']
qcd_dict['q1418'] = cring[0]['QCD_Pt_1400to1800_TuneCP5_13TeV_pythia8']['QCD_Pt_1400to1800']
qcd_dict['q1824'] = cring[0]['QCD_Pt_1800to2400_TuneCP5_13TeV_pythia8']['QCD_Pt_1800to2400']
qcd_dict['q2432'] = cring[0]['QCD_Pt_2400to3200_TuneCP5_13TeV_pythia8']['QCD_Pt_2400to3200']
qcd_dict['q32inf'] = cring[0]['QCD_Pt_3200toInf_TuneCP5_13TeV_pythia8']['QCD_Pt_3200toInf']

#QCD scale factors dictionary
qcd_sf_dict = {}
qcd_sf_dict['scales170to300'] = (((xs_170to300*1000)*IL)/(qcd_dict['q173']['entries']))
qcd_sf_dict['scales300to470'] = (((xs_300to470*1000)*IL)/(qcd_dict['q347']['entries']))
qcd_sf_dict['scales470to600'] = (((xs_470to600*1000)*IL)/(qcd_dict['q476']['entries']))
qcd_sf_dict['scales600to800'] = (((xs_600to800*1000)*IL)/(qcd_dict['q68']['entries']))
qcd_sf_dict['scales800to1000'] = (((xs_800to1000*1000)*IL)/(qcd_dict['q810']['entries']))
qcd_sf_dict['scales1000to1400'] = (((xs_1000to1400*1000)*IL)/(qcd_dict['q1014']['entries']))
qcd_sf_dict['scales1400to1800'] = (((xs_1400to1800*1000)*IL)/(qcd_dict['q1418']['entries']))
qcd_sf_dict['scales1800to2400'] = (((xs_1800to2400*1000)*IL)/(qcd_dict['q1824']['entries']))
qcd_sf_dict['scales2400to3200'] = (((xs_2400to3200*1000)*IL)/(qcd_dict['q2432']['entries']))
qcd_sf_dict['scales3200toInf'] = (((xs_3200toInf*1000)*IL)/(qcd_dict['q32inf']['entries']))

#scale all the qcd values
entries = list(qcd_dict['q173'].keys())
for i in range(0, len(qcd_dict)):
    qcd_range = list(qcd_dict.keys())[i]
    qcd_scales = list(qcd_sf_dict.keys())[i]
    for j in range(1, len(entries)):
        qcd_dict[qcd_range][entries[j]].view(flow=True)[:] *= qcd_sf_dict[qcd_scales]

#combine the qcds into individual variable fields
qcd_vars_scaled = {}
for i in range(1, len(entries)):
    temp_hist = qcd_dict['q173'][entries[i]]
    for j in range(1, len(qcd_dict)):
        temp_hist += qcd_dict[list(qcd_dict.keys())[j]][entries[i]]
    qcd_vars_scaled[entries[i]] = temp_hist

In [8]:
for l in range(45, 48):
    for m in range(l+2, 52):

        hgg_copy = copy.copy(hgg_scaled)
        hbb_copy = copy.copy(hbb_scaled)

        hgg_copy['Color_Ring'] = hgg_copy['Color_Ring'][:,:,:,l:m]
        hbb_copy['Color_Ring'] = hbb_copy['Color_Ring'][:,:,:,l:m]

        if (hgg_copy['Color_Ring'].sum().value == 0) or (hbb_copy['Color_Ring'].sum().value == 0):
            continue

        hgg = hgg_copy
        hbb = hbb_copy
        
        #get the totals for each histogram
        hgg_totals_dict = {}
        for i in range(1, len(hgg_entries)):
            if len(hgg[hgg_entries[i]].axes) == 1:
                hgg_totals_dict[hgg_entries[i]] = hgg[hgg_entries[i]][0:len(hgg[hgg_entries[i]].view()):sum]
            else:
                for j in hgg[hgg_entries[i]].axes.name:
                        hgg_totals_dict[j] = hgg[hgg_entries[i]].project(j)[0:len(hgg[hgg_entries[i]].project(j).view()):sum]

                
        hbb_totals_dict = {}
        for i in range(1, len(hbb_entries)):
            if len(hbb[hbb_entries[i]].axes) == 1:
                hbb_totals_dict[hbb_entries[i]] = hbb[hbb_entries[i]][0:len(hbb[hbb_entries[i]].view()):sum]
            else:
                for j in hbb[hbb_entries[i]].axes.name:
                        hbb_totals_dict[j] = hbb[hbb_entries[i]].project(j)[0:len(hbb[hbb_entries[i]].project(j).view()):sum]
        
        #get the true positive fractions
        hgg_truth_dict = {}
        for i in range(1, len(hgg_entries)):
            if len(hgg[hgg_entries[i]].axes) == 1:
                temp_list = []
                for j in range(1, len(hgg[hgg_entries[i]].view())+1):
                    temp_list.append(hgg[hgg_entries[i]][0:j:sum].value/hgg_totals_dict[hgg_entries[i]].value)
                hgg_truth_dict[hgg_entries[i]] = temp_list
            else:
                for j in hgg[hgg_entries[i]].axes.name:
                        temp_list = []
                        for k in range(1, len(hgg[hgg_entries[i]].project(j).view())+1):
                            temp_list.append(hgg[hgg_entries[i]].project(j)[0:k:sum].value/hgg_totals_dict[hgg_entries[i]].value)
                        hgg_truth_dict[j] = temp_list
                        
        hbb_truth_dict = {}
        for i in range(1, len(hbb_entries)):
            if len(hbb[hbb_entries[i]].axes) == 1:
                temp_list = []
                for j in range(1, len(hbb[hbb_entries[i]].view())+1):
                    temp_list.append(hbb[hbb_entries[i]][0:j:sum].value/hbb_totals_dict[hbb_entries[i]].value)
                hbb_truth_dict[hbb_entries[i]] = temp_list
            else:
                for j in hbb[hbb_entries[i]].axes.name:
                        temp_list = []
                        for k in range(1, len(hbb[hbb_entries[i]].project(j).view())+1):
                            temp_list.append(hbb[hbb_entries[i]].project(j)[0:k:sum].value/hbb_totals_dict[hbb_entries[i]].value)
                        hbb_truth_dict[j] = temp_list
        

        qcd_vars_copy = copy.deepcopy(qcd_vars_scaled)
        qcd_vars_copy['Color_Ring'] = qcd_vars_copy['Color_Ring'][:,:,:,l:m]
        qcd_vars = qcd_vars_copy
        
        #totals for each qcd hist
        qcd_totals_dict = {}
        for i in range(1, len(entries)):
            if len(qcd_vars[entries[i]].axes) == 1:
                qcd_totals_dict[entries[i]] = qcd_vars[entries[i]][0:len(qcd_vars[entries[i]].view()):sum]
            else:
                for j in qcd_vars[entries[i]].axes.name:
                        qcd_totals_dict[j] = qcd_vars[entries[i]].project(j)[0:len(qcd_vars[entries[i]].project(j).view()):sum]
        
        #false positive fractions for each qcd variable
        qcd_false_positive_dict = {}
        for i in range(1, len(entries)):
            if len(qcd_vars[entries[i]].axes) == 1:
                temp_list = []
                for j in range(1, len(qcd_vars[entries[i]].view())+1):
                    temp_list.append(qcd_vars[entries[i]][0:j:sum].value/qcd_totals_dict[entries[i]].value)
                qcd_false_positive_dict[entries[i]] = temp_list
            else:
                for j in qcd_vars[entries[i]].axes.name:
                        temp_list = []
                        for k in range(1, len(qcd_vars[entries[i]].project(j).view())+1):
                            temp_list.append(qcd_vars[entries[i]].project(j)[0:k:sum].value/qcd_totals_dict[entries[i]].value)
                        qcd_false_positive_dict[j] = temp_list
        
        hgg_auc_dict = {}
        hgg_keys = list(hgg_truth_dict.keys())
        for i in range(0, len(hgg_keys)):
            hgg_auc_dict[hgg_keys[i]] = metrics.auc(
                                                hgg_truth_dict[hgg_keys[i]],
                                                qcd_false_positive_dict[hgg_keys[i]]
                                            )
        # for i in range(0, len(hgg_keys)):
        #     if hgg_auc_dict[hgg_keys[i]] >= 0.5:
        #         hgg_auc_dict[hgg_keys[i]] = 1 - hgg_auc_dict[hgg_keys[i]]
        
        hbb_auc_dict = {}
        hbb_keys = list(hbb_truth_dict.keys())
        for i in range(0, len(hbb_keys)):
            hbb_auc_dict[hbb_keys[i]] = metrics.auc(
                                                hbb_truth_dict[hbb_keys[i]],
                                                qcd_false_positive_dict[hbb_keys[i]]
                                            )
        # for i in range(0, len(hbb_keys)):
        #     if hbb_auc_dict[hbb_keys[i]] >= 0.5:
        #         hbb_auc_dict[hbb_keys[i]] = 1 - hbb_auc_dict[hbb_keys[i]]
        
        category = 'mass_window_' + str(l) + '_' + str(m)
        master_dict[category] = {}
        master_dict[category]['Hgg'] = hgg_auc_dict
        master_dict[category]['Hbb'] = hbb_auc_dict

In [9]:
master_dict

{'mass_window_45_47': {'Hgg': {'Color_Ring': 0.4932425231810516,
   'PT': 0.5333368576609064,
   'Mass': 0.5097597051123297,
   'SDMass': 0.331916207818804},
  'Hbb': {'Color_Ring': 0.5038238849175458,
   'PT': 0.5743970433803549,
   'Mass': 0.5080681625282684,
   'SDMass': 0.34961810477022043}},
 'mass_window_45_48': {'Hgg': {'Color_Ring': 0.4944132955020004,
   'PT': 0.5295599672257226,
   'Mass': 0.49436321114130755,
   'SDMass': 0.38383039203451846},
  'Hbb': {'Color_Ring': 0.5071649673459365,
   'PT': 0.5711261646449066,
   'Mass': 0.48695858814439374,
   'SDMass': 0.3905803349078424}},
 'mass_window_45_49': {'Hgg': {'Color_Ring': 0.49505422191157045,
   'PT': 0.5253610155001185,
   'Mass': 0.4806704649747207,
   'SDMass': 0.3938333873590792},
  'Hbb': {'Color_Ring': 0.5081478481099421,
   'PT': 0.5672397413328656,
   'Mass': 0.4726216792740605,
   'SDMass': 0.39833994201825507}},
 'mass_window_45_50': {'Hgg': {'Color_Ring': 0.49528402922677395,
   'PT': 0.523474308517898,
   'Mas

In [10]:
hgg_list = {}
for i in master_dict:
    hgg_list[i] = master_dict[i]['Hgg']['Color_Ring']

In [11]:
min(hgg_list, key=hgg_list.get)

'mass_window_45_47'

In [12]:
hgg_list[min(hgg_list, key=hgg_list.get)]

0.4932425231810516

In [13]:
hbb_list = {}
for i in master_dict:
    hbb_list[i] = master_dict[i]['Hbb']['Color_Ring']

In [14]:
min(hbb_list, key=hbb_list.get)

'mass_window_45_47'

In [15]:
hbb_list[min(hbb_list, key=hbb_list.get)]

0.5038238849175458

In [16]:
hgg_scaled

{'entries': 2392373,
 'Color_Ring': Hist(
   Regular(50, 0, 10, underflow=False, overflow=False, name='Color_Ring'),
   Regular(50, 150, 2500, underflow=False, overflow=False, name='PT'),
   Regular(50, 50, 150, underflow=False, overflow=False, name='Mass'),
   Regular(50, 50, 150, underflow=False, overflow=False, name='SDMass'),
   storage=Weight()) # Sum: WeightedSum(value=522.391, variance=0.37803)}