In [None]:
import awkward as ak
import numpy as np

import json

import matplotlib.pyplot as plt
from yahist import Hist1D, Hist2D
#import useful packages

In [None]:
#Load dataframe and keep only the "Data" events
df = ak.from_parquet('Data_04Oct23/merged_nominal_scored.parquet')
data = df[df.process_id==0]

In [None]:
low_mass_mask = ak.where(data.Diphoton_mass <= 1000,True,False)
min_mass_mask = ak.where(data.Diphoton_mass >= 55,True,False)
data = data[low_mass_mask&min_mass_mask]
print(len(data))

In [None]:
#Get list of all mass points with MY=90
res_points = []
for field in data.fields:
    if 'MY_80' in field:
        res_points.append(field)
res_points.sort()
for point in res_points:
    print(point)

In [None]:
#Load optimization results
with open('Data_04Oct23/optim_results.json') as f_in:
    optim_log = json.load(f_in)

In [None]:
#Get categories in actual usable format. Last line reverses the cat boundaries list
cats = {}
for entry in optim_log:
    cats[entry['score']] = entry['category_boundaries'][::-1]

In [None]:
mgg_bins = np.linspace(65,180,23)

In [None]:
peak_list = ['240']#,'280','300','320']

for peak in peak_list:
    tag = 'intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_90'.format(peak)
    peak_mask = ak.where(data[tag]>0.99999)
    peak_count = ak.count_nonzero(peak_mask)
    h1 = Hist1D(data.Diphoton_mass[peak_mask], bins=mgg_bins, label=peak+': '+str(peak_count), overflow=False)
    h1.plot()

plt.yscale('log')
plt.legend(loc='upper right')
plt.ylabel('Count')
plt.xlabel('Diphoton mass')

In [None]:
def cat_mask(df, score, low, high):
    rtn_mask = (df[score] <= high) & (df[score] > low)
    return rtn_mask

In [None]:
peaks = ['240','280','300','320','360','400','450','500','550','600','650','700','750','800','850','900','950','1000']
for peak in peaks:
    score='intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_90'.format(peak)
    text='PNN Score MX{}'.format(peak)
    h1 = Hist1D(data[score],bins=np.linspace(0.999,1.0001,100),label=text,color='black',overflow=False)
    h1.plot()
    
    """for cat in range(10):
        print(cat)
        print(cats[score][cat])
        print('----------------')
        if cat==0:
            count=ak.count_nonzero(cat_mask(data, score, cats[score][cat], 1))
        else:
            count=ak.count_nonzero(cat_mask(data, score, cats[score][cat], cats[score][cat-1]))
        plt.axvline(x=cats[score][cat], color='C%d'%cat, label='SR {}: {} events'.format(cat,count))
        """

    plt.title('PNN Score Distribution for MX {} MY 90'.format(peak))
    plt.legend(loc='upper left')
    plt.ylabel('Data Count')
    plt.xlabel('PNN Score')

    plt.savefig('/home/users/iareed/public_html/XtoYH_plots/ABCD_plots/input_distributions/MX_{}_score.png'.format(peak))
    plt.clf()

In [None]:
peak = '240'
tag = 'intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_90'.format(peak)

for cat in range(10):
    tmp_mask = cat_mask(data, tag, cats[tag][cat+1], cats[tag][cat])
    print(cats[tag][cat+1])
    blind_mask = ak.where((data.Diphoton_mass <= 82) | (data.Diphoton_mass >= 98), True,False)
    cat_count = ak.count_nonzero(tmp_mask)
    h1 = Hist1D(data.Diphoton_mass[tmp_mask], bins=mgg_bins, label='Cat {}: {}'.format(str(cat),str(cat_count)), overflow=False)

    h1.plot()

plt.yscale('log')
plt.legend(loc='upper right')
plt.title('Data Count for Top Catagories MX_{} MY_90'.format(peak))
plt.ylabel('Data Count')
plt.xlabel('Diphoton mass (GeV)')
#plt.savefig('/home/users/iareed/public_html/XtoYH_plots/cat_plots/MX_{}.png'.format(peak))

In [None]:
scores=data.intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_1000_MY_800
scores=np.sort(scores)[::-1]
print(scores)

In [None]:
old_val = 10
for score in range(100000):
    new_val=scores[score]
    if new_val==old_val:
        continue
    else:
        print(new_val)
        old_val=new_val

In [None]:
scores=scores[::-1]
print(scores)

In [None]:
len(data.event)

In [None]:
len(np.unique(data.event[data.intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_240_MY_90>0.9999868869781494]))

In [None]:
data.year

In [None]:
peaks = ['240','280','300','320','360','400','450','500','550','600','650','700','750','800','850','900','950','1000']
for peak in peaks:
    #print('Checking MX {} for empty regions'.format(peak))
    tag = 'intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_90'.format(peak)
    empty_cat_list = []
    for cat in range(57):
        tmp_mask = cat_mask(data, tag, cats[tag][cat+1], cats[tag][cat])
        #print(cats[tag][cat+1])
        cat_count = ak.count_nonzero(tmp_mask)
        if cat_count==0:
            empty_cat_list.append(cat)
    #print('Found {} empty regions'.format(len(empty_cat_list)))
    #print('They are {}'.format(empty_cat_list))
    print('{} & {} & {} \\\hline'.format(peak, len(empty_cat_list), empty_cat_list))

In [None]:
peaks = ['240','280','300','320','360','400','450','500','550','600','650','700','750','800','850','900','950','1000']
for peak in peaks:
    #print('Checking MX {} for empty regions'.format(peak))
    tag = 'intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_70'.format(peak)
    empty_cat_list = []
    for cat in range(57):
        tmp_mask = cat_mask(data, tag, cats[tag][cat+1], cats[tag][cat])
        #print(cats[tag][cat+1])
        cat_count = ak.count_nonzero(tmp_mask)
        if cat_count==0:
            empty_cat_list.append(cat)
    #print('Found {} empty regions'.format(len(empty_cat_list)))
    #print('They are {}'.format(empty_cat_list))
    print('{} & {} & {} \\\hline'.format(peak, len(empty_cat_list), empty_cat_list))

In [None]:
peaks = ['240','280','300','320','360','400','450','500','550','600','650','700','750','800','850','900','950','1000']
for peak in peaks:
    #print('Checking MX {} for empty regions'.format(peak))
    tag = 'intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_80'.format(peak)
    empty_cat_list = []
    for cat in range(57):
        tmp_mask = cat_mask(data, tag, cats[tag][cat+1], cats[tag][cat])
        #print(cats[tag][cat+1])
        cat_count = ak.count_nonzero(tmp_mask)
        if cat_count==0:
            empty_cat_list.append(cat)
    #print('Found {} empty regions'.format(len(empty_cat_list)))
    #print('They are {}'.format(empty_cat_list))
    print('{} & {} & {} \\\hline'.format(peak, len(empty_cat_list), empty_cat_list))

In [None]:
peaks = ['240','280','300','320','360','400','450','500','550','600','650','700','750','800','850','900','950','1000']
for peak in peaks:
    #print('Checking MX {} for empty regions'.format(peak))
    tag = 'intermediate_transformed_score_NMSSM_XYH_Y_gg_H_bb_MX_{}_MY_100'.format(peak)
    empty_cat_list = []
    for cat in range(57):
        tmp_mask = cat_mask(data, tag, cats[tag][cat+1], cats[tag][cat])
        #print(cats[tag][cat+1])
        cat_count = ak.count_nonzero(tmp_mask)
        if cat_count==0:
            empty_cat_list.append(cat)
    #print('Found {} empty regions'.format(len(empty_cat_list)))
    #print('They are {}'.format(empty_cat_list))
    print('{} & {} & {} \\\hline'.format(peak, len(empty_cat_list), empty_cat_list))

In [None]:
Look at normal DY and signal PNN score distribution