In [None]:
# Add the relevant scripts from LArMachineLearningData
# Nice the process so it can run with lots of cores on low priority
import os
os.nice(20)

# Add local paths
import sys
hnlDIR = os.environ['_']
sys.path.append('../pyscript')

# From pyscript Library
from Plotting import *
from Dictionary import *
from HelperFunctions import *
from SystematicsHelpers import *

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import pyhf
from pyhf.contrib.viz import brazil

pyhf.set_backend("numpy")
np.random.seed(42)

In [None]:
savePath = "../plot_files/toy_distributions/"

In [None]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [None]:
b_col = "gold"#col_dict['Peach']
sb_col = col_dict['MintGreen']

<h1> Simple Models </h1>

In [None]:
model = pyhf.simplemodels.uncorrelated_background(

    signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]

)

observations = [50.0, 52.0]
#observations = [51, 48]

data = observations + model.config.auxdata


print(f"Channels: {model.config.channels}")
print(f"Samples: {model.config.samples}")
print(f"Parameters: {model.config.parameters}")

<h1> Real Models </h1>

<h1> Upper Limits using Asymptotic Calculator</h1>

In [None]:
poi_vals = np.linspace(0, 1, 100)

obs_limit_single, exp_limits_single, (scan, results) = pyhf.infer.intervals.upper_limits.upper_limit(data, 
                                                                                        model, 
                                                                                        poi_vals, 
                                                                                        level=0.1, 
                                                                                        return_results=True,
                                                                                        return_calculator=True,
                                                                                        #return_expected_set=True,
                                                                                        test_stat='qtilde')

print(f"Upper limit (obs): μ = {obs_limit_single:.4f}")
print(f"Upper limit (exp): μ = {exp_limits_single[2]:.4f}" + "\n")

In [None]:
mu_test =  exp_limits_single[2]
print(f'mu test = {mu_test}')

CLs_obs, CL, CLs_exp_band, asymp_calc = pyhf.infer.hypotest(
    mu_test, data, model, test_stat="qtilde" 
    , return_expected_set=True 
    , return_tail_probs=True
    , return_calculator=True

)

print(f'CL_sb = {CL[0]}')
print(f'CL_b = {CL[1]}')
print(f'CL_s obs = {CLs_obs}')
print(f'CL_s exp = {CLs_exp_band[2]}')

In [None]:
mu_test = exp_limits_single[2]
#mu_test = 0
#mu_test = exp_limits_single[2]*2

teststat_asymp = asymp_calc.teststatistic(mu_test)

sb_dist_asymp, b_dist_asymp = asymp_calc.distributions(mu_test)

p_sb_asymp, p_b_asymp, p_s_asymp = asymp_calc.pvalues(teststat_asymp, sb_dist_asymp, b_dist_asymp)

print(f'qtilde = {teststat_asymp}')
print(f'CL_sb = {p_sb_asymp}')
print(f'CL_b = {p_b_asymp}')
print(f'CL_s obs = CL_sb / CL_b = {p_s_asymp}')

In [None]:
bins= np.arange(-6, 6, 0.1)

sb_dist_step = []
b_dist_step = []
for a,b in zip(bins, bins[1:]):
    sb_dist_step.append(sb_dist_asymp.cdf(b) - sb_dist_asymp.cdf(a))
    b_dist_step.append(b_dist_asymp.cdf(b) - b_dist_asymp.cdf(a))
    
sb_dist_step = np.array(sb_dist_step)
b_dist_step = np.array(b_dist_step)

In [None]:
fig, (ax) = plt.subplots(1,1, figsize = (6,4), sharey= False)
#---------------------------------------------------------------------#
ax.step(bins, np.insert(sb_dist_step, 0, 0)
        , label=r'f($\tilde{q}_{\mu}|s+b$)'
        , c = sb_col)

ax.step(bins, np.insert(b_dist_step, 0, 0)
        , label=r'f($\tilde{q}_{\mu}|b$)'
        , c = b_col)

ax.axvline(x = teststat_asymp, c='r', lw = 2, ls = '--', label = r'$\hat{q}_{expected}$')

bound_idx = find_nearest(bins, value=teststat_asymp)
plt.fill_between(bins[bound_idx:] , np.insert(sb_dist_step, 0, 0)[bound_idx:]
                 , step="pre"
                 , alpha=1, color = "none"
                 , hatch="xxx", edgecolor=sb_col, linewidth=0.0
                 , label = r'$p_{s+b}$')

plt.fill_between(bins[:bound_idx+1] , np.insert(b_dist_step, 0, 0)[:bound_idx+1]
                 , step="pre" 
                 , alpha=1, color = "none"
                 , hatch="xxx", edgecolor=b_col, linewidth=0.0
                 , label = r'$p_{b}$')
#---------------------------------------------------------------------#

ax.legend(loc='best', fontsize = 16)

#---------------------------------------------------------------------#
ax.set_xlim(-6,10)
ax.set_ylim(0, 0.05)

plot_tick(ax, 16)

#title =  r"Test statistsics distribution for $\mu = {:.2f}$".format(mu_test)
title = ""
plot_title(ax, title, r"$\tilde{q}_{\mu}$",  r"f($\tilde{q}_{\mu}|\mu^{\prime}$)", 16)
#---------------------------------------------------------------------#
fig.tight_layout()

plt.savefig(savePath+str("asymtotic.png"), dpi=200)
plt.show()

<h1> Toys Time </h1>

In [None]:
nsamples = 4000
print(f"n Samples: {nsamples}")

toy_calc = pyhf.infer.calculators.ToyCalculator(
    data, model, test_stat='qtilde', ntoys=nsamples
)

In [None]:
exp_limits_single[2]

In [None]:
toy_mu_vals_ub = np.array([exp_limits_single[2] + poi_vals[:9]])  
toy_mu_vals_lb = np.array([exp_limits_single[2] - poi_vals[1:2]])

toy_mu_vals = np.concatenate((toy_mu_vals_lb, toy_mu_vals_ub), axis = 1)
toy_mu_vals = np.sort(toy_mu_vals[0])

print(toy_mu_vals)

In [None]:
cls_toy_vals = []
for p in toy_mu_vals:
    mu_test = p

    teststat_toy = toy_calc.teststatistic(mu_test)

    sb_dist_toy, b_dist_toy = toy_calc.distributions(mu_test)

    p_sb_toy, p_b_toy, p_s_toy = toy_calc.pvalues(teststat_toy, sb_dist_toy, b_dist_toy)
    
    cls_toy_vals.append(p_s_toy)

cls_toy_vals = np.array(cls_toy_vals)



In [None]:
nearest_idx = find_nearest(cls_toy_vals, 0.1)

print(nearest_idx)
print(cls_toy_vals[nearest_idx])
print(toy_mu_vals[nearest_idx])

In [None]:
print("nearest mu = {}".format(toy_mu_vals[nearest_idx]))

mu_test = toy_mu_vals[nearest_idx]
print(f"mu test: {mu_test}")

teststat_toy = toy_calc.teststatistic(mu_test)

sb_dist_toy, b_dist_toy = toy_calc.distributions(mu_test)

p_sb_toy, p_b_toy, p_s_toy = toy_calc.pvalues(teststat_toy, sb_dist_toy, b_dist_toy)

print(f'CL_sb = {p_sb_toy}')
print(f'CL_b = {p_b_toy}')
print(f'CL_s obs = CL_sb / CL_b = {p_s_toy}')

<h1> Plot </h1>

In [None]:
fig, (ax1) = plt.subplots(1,1, figsize = (6,4), sharey= False)

bins= np.arange(0,10, 0.15)
#---------------------------------------------------------------------#
sb_hist, _, _ = ax1.hist(
    sb_dist_toy.samples,
    bins=bins,
    histtype="step",
    density=True,
    label=r'f($\tilde{q}_{\mu}|s+b$)',
    linewidth=2, color = sb_col
)

b_hist, _, _  = ax1.hist(
    b_dist_toy.samples,
    bins=bins,
    histtype="step",
    density=True,
    label=r'f($\tilde{q}_{\mu}|b$)',
    linewidth=2, color= b_col
)

ax1.axvline(x = teststat_toy, c='r', lw = 2, ls = '--', label = r'$\hat{q}_{expected}$')

bound_idx = find_nearest(bins, value=teststat_toy)

plt.fill_between(bins[bound_idx:] , np.insert(sb_hist, 0, 0)[bound_idx:]
                 , step="pre"
                 , alpha=1, color = "none"
                 , hatch="xxx", edgecolor=sb_col, linewidth=0.0
                 , label = r'$p_{s+b}$')

plt.fill_between(bins[:bound_idx+1] , np.insert(b_hist, 0, 0)[:bound_idx+1]
                 , step="pre" 
                 , alpha=1, color = "none"
                 , hatch="xxx", edgecolor=b_col, linewidth=0.0
                 , label = r'$p_{b}$')
#---------------------------------------------------------------------#
#title = r"Test statistsics distribution for $\mu = {:.2f}$".format(mu_test)
title = ""

plot_title(ax1, title, r"$\tilde{q}_{\mu}$",  r"f($\tilde{q}_{\mu}|\mu^{\prime}$)", 16)
plot_tick(ax1, 16)
ax1.legend(fontsize = 16)

ax1.set_yscale('log')
ax1.set_ylim(0.01, 100)
ax1.set_xlim(0,10)
#---------------------------------------------------------------------#
fig.tight_layout()

plt.savefig(savePath+str("toy.png"), dpi=200)
plt.show()

In [None]:
print(exp_limits_single[2])
print(toy_mu_vals[nearest_idx])

In [None]:
(exp_limits_single[2] - toy_mu_vals[nearest_idx]) / exp_limits_single[2] * 100

In [None]:
(exp_limits_single[2] - toy_mu_vals[nearest_idx]) / toy_mu_vals[nearest_idx] * 100