In [1]:
import argparse
import numpy as np
import sys, traceback
import uproot 
import awkward as ak
from pathlib import Path

from typing import Dict, List 
import re
import pickle
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, roc_auc_score


In [4]:
with open('./dijet_pythia_mc16A.pkl', 'rb') as f:
    HistMap = pickle.load(f)

In [5]:
###### define functions
def GetHistBin(histogram_name: str):
    if 'pt' in histogram_name:
        return np.linspace(0, 2000, 61)
    elif 'eta' in histogram_name:
        return np.linspace(-2.5, 2.5, 51)
    elif 'ntrk' in histogram_name:
        return np.linspace(0, 60, 61)
    elif 'bdt' in histogram_name:
        return np.linspace(-0.8, 0.7, 61)
    elif 'width' in histogram_name:
        return np.linspace(0, 0.4, 61)
    elif 'c1' in histogram_name:
        return np.linspace(0, 0.4, 61)

In [21]:
label_var = ["pt", "eta", "ntrk", "width", "c1", "bdt", "newbdt"]

# for weights_hist_name in HistMap["weights"].keys():
weights_hist_name = '500_LeadingJet_Forward_Gluon' 
# for l_var in label_var[0:1]:
l_var = 'pt'
values_hist_name = weights_hist_name + f"_{l_var}"
bin_edges = GetHistBin(histogram_name = values_hist_name)
histogram_contents = np.histogram(a = HistMap["values"][values_hist_name], weights = HistMap["weights"][weights_hist_name], 
                        bins = bin_edges, range = (bin_edges[0], bin_edges[-1]))



In [34]:
HistMap["values"][values_hist_name][0]

503.8865966796875

In [35]:
histogram_contents[1][15]

500.00000000000006

In [37]:
nbins = len(bin_edges) - 1 
sum_w2_at_var = np.zeros((nbins,), dtype = np.float32)
inds = np.digitize(x = HistMap["values"][values_hist_name], bins = bin_edges)
inds = inds - 1

In [25]:
len(inds)

8428

In [36]:
inds - 1

array([15, 17, 17, ..., 16, 16, 16])

In [38]:
for i in range(nbins):
    weights_at_bin = HistMap["weights"][weights_hist_name][np.where(inds == i)[0]]
    sum_w2_at_var[i] = np.sum(np.power(weights_at_bin, 2))


In [40]:
np.where(sum_w2_at_var > 0)

(array([15, 16, 17]),)

In [39]:
np.where(histogram_contents[0] > 0)

(array([15, 16, 17]),)

In [None]:
histogram_err = np.histogram(a = bin_edges[:-1], weights = sum_w2_at_var, 
                bins = bin_edges, range = (bin_edges[0], bin_edges[-1]))