In [1]:
import os
import glob
import numpy  as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib widget

from iminuit import Minuit

from scipy.stats       import rv_continuous
from scipy.interpolate import interp1d

plt.rcParams["font.size"]      = 15
plt.rcParams["font.family"]    = "sans-serif"
plt.rcParams["font.monospace"] = "Computer Modern Sans Serif"
plt.rcParams["figure.max_open_warning"] = 50

In [2]:
indir = os.path.expandvars("$LUSTRE/NEXT100/{background}/{component}/detsim/prod/esmeralda/")

backgrounds = ["214Bi", "208Tl"]
components  = ["LIGHT_TUBE", "SAPPHIRE_WINDOW", "SIPM_BOARD"]

In [3]:
filtered_summaries = []

for background in backgrounds:
    
    print("Background:", background)
    print("-----------")
    
    for component in components:
        
        try:
            filename = glob.glob(indir.format(background=background, component=component) + "/esmeralda_combined*")[0]
        except IndexError:
            continue
            
        summary_ = pd.read_hdf(filename, "Summary/Events")
        Ntot = len(summary_)
        summary_ = summary_[~summary_["evt_out_of_map"]]
        print(component, "In map selection:", int(len(summary_)/Ntot*100), "%")
        
        summary_["background"] = background
        summary_["component"]  = component
        
        filtered_summaries.append(summary_)
    print()

summary = pd.concat(filtered_summaries)

Background: 214Bi
-----------
LIGHT_TUBE In map selection: 78 %
SAPPHIRE_WINDOW In map selection: 98 %
SIPM_BOARD In map selection: 97 %

Background: 208Tl
-----------
LIGHT_TUBE In map selection: 95 %
SAPPHIRE_WINDOW In map selection: 98 %
SIPM_BOARD In map selection: 97 %



In [4]:
indir = os.path.expandvars("$LUSTRE/NEXT100/0nubb/detsim/prod/esmeralda/")

filename = glob.glob(indir + "/esmeralda_combined*")[0]

summary_ = pd.read_hdf(filename, "Summary/Events")
Ntot = len(summary_)
summary_ = summary_[~summary_["evt_out_of_map"]]

print("Signal:")
print("-------")
print("In map selection:", int(len(summary_)/Ntot*100), "%")

summary_["background"] = "0nubb"
summary_["component"]  = "0nubb"

summary = pd.concat([summary, summary_])

Signal:
-------
In map selection: 96 %


In [5]:
sel = summary_["evt_ntrks"] == 1
print("1 track selection:", int(np.sum(sel)/len(sel)*100))

1 track selection: 49


In [6]:
emin, emax, de = 2.0, 2.7, 0.01
ebins = np.arange(emin, emax, de)

In [7]:
fig, ax = plt.subplots(1, 1, figsize=[8, 5])

for background in summary["background"].unique():
    
    sel = summary["background"] == background
#     sel = sel & (summary["evt_ntrks"]>=1)
    energy = summary[sel]["evt_energy"].values
    
    #plot
#     fig, ax = plt.subplots(1, 1, figsize=[8, 5])
#     ax.set_title(background)
    h, _ = np.histogram(energy, bins=ebins)
    
    ax.bar(ebins[:-1], h, width=de, alpha=0.5, label=background)
    ax.set_yscale("linear")
    ax.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
# for background in backgrounds:
#     for component in components:
        
#         sel = (summary["background"] == background) & (summary["component"]==component)
#         energy = summary[sel]["evt_energy"].values
        
#         #plot
#         fig, ax = plt.subplots(1, 1, figsize=[8, 5])
#         ax.set_title(background + " " + component)
#         h, _ = np.histogram(energy, bins=ebins)
#         ax.bar(ebins[:-1], h, width=de, alpha=0.5, label=background+component)
#         ax.set_yscale("log")

# Create and test PDFs

In [9]:
emin, emax, de = 2.0, 2.7, 0.01
ebins = np.arange(emin, emax, de)

pdf_collection = {}

for background in summary["background"].unique():
    
    sel = summary["background"] == background
    energy = summary[sel]["evt_energy"].values
    
    h, _ = np.histogram(energy, bins=ebins)
    pdf = h/(np.sum(h)*de)
    
    pdf_collection[background] = pdf

In [10]:
plt.figure()
plt.plot(ebins[:-1], pdf_collection["214Bi"])
plt.yscale("linear")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [13]:
pdf = pdf_collection["208Tl"]

bincenters = (ebins[:-1] + ebins[1:])/2.
f = interp1d(bincenters, pdf, kind="cubic")

In [14]:
plt.figure(figsize=[8, 5])
plt.plot(bincenters, pdf)

es = np.arange(f.x[0], f.x[-1], 0.001)
plt.plot(es, f(es))

plt.yscale("linear")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# PDF

In [29]:
class pdf(rv_continuous):
    """
    Params:
    -------
    a, b   : lower and upper limits
    numpdf : tuple (x, y) of numerical pdf
    funcpdf: function defining pdf
    n      : number of partitions of to numerically compute the cdf
    kind   : kind of interpolation (see interp1d function)
    
    Methods:
    -------
    see scipy.stats.rv_continuos
    """
    def __init__(self, name=None, a=None, b=None, numpdf=None, funcpdf=None, n=10000, kind="cubic"):
        super().__init__(momtype=0, name=name, a=a, b=b)
        
        if numpdf and funcpdf:
            raise Exception("Both numerical and functional pdf introduced")
        if (numpdf is None) and (funcpdf is None):
            raise Exception("Introduce pdf")
        
        if numpdf:
            func = self.create_pdf_function(numpdf, kind)
            self.set_pdf(func)
            
            func = self.create_cdf_function(n, kind)
            self.set_cdf(func)
            
        if funcpdf:
            self.set_pdf(funcpdf)
    
    ## PDF
    def create_pdf_function(self, numpdf, kind):
        x, y = numpdf
        self.get_norm(x, y)
        self.a, self.b = min(x), max(x)
        f = interp1d(x, y/self.norm, kind=kind, bounds_error=False, fill_value=0)
        return f
        
    def set_pdf(self, func):
        self._pdf = func
        
    def get_norm(self, x, y):
        ym = (y[1:] + y[:-1])/2.
        norm = np.sum(np.diff(x)*ym)
        self.norm = norm
        return norm
        
    ## CDF (this greatly speeds up the computation of rvs)
    def create_cdf_function(self, n, kind):
        dx = (self.b - self.a)/n
        x = np.arange(self.a-dx, self.b + dx, dx)
        y = self.pdf(x)
        ym = (y[1:] + y[:-1])/2.
        cdf = np.cumsum(np.diff(x)*ym)
        f = interp1d(x[1:], cdf, kind=kind, bounds_error=False, fill_value=0)
        return f
    
    def set_cdf(self, func):
        self._cdf = func

In [16]:
x = bincenters
y = pdf_collection["214Bi"]

In [17]:
p = pdf(numpdf=(x, y))

In [18]:
x = np.arange(2, 2.7, 0.0001)
y = p.pdf(x)

In [19]:
r = p.rvs(size=20000)

In [20]:
plt.figure()
plt.plot(x, y)
plt.hist(r, bins=200, density=True);
# plt.yscale("log")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Ext PDF

In [296]:
class Extpdf:
    """
    defines an extended pdf for a pdf collection such that:
    
    ext-pdf = sum(w_i*pdf_i) where wi represents the psf_i weight
    
    Params:
    -------
    pdf collection: dictionary of (name, pdf instances)
    
    Methods:
    -------
    pdf, rvs (same as pdf class)
    eval_logL: returns the -log likelihood evaluated at given input
    """
    
    def __init__(self, collection):
        self.collection = collection
        self.n      = len(collection)
        self.names  = list(collection.keys())
        self.args_order = dict([(i, name) for i, name in enumerate(self.names)])
        
        
    def pdf(self, x, *params):
        N = np.sum(params)
        products = []
        for i in self.args_order:
            n    = params[i]
            name = self.args_order[i]
            p    = self.collection[name]
            products.append((n/N)*p.pdf(x))
        return np.sum(products, axis=0)
    
    
    def rvs(self, *params, size=1):
        r = []
        for i in self.args_order:
            n    = params[i]
            name = self.args_order[i]
            p    = self.collection[name]
            r.append(p.rvs(size=size*n))
        return np.concatenate(r)
    
    
    def eval_logL(self, x):
        def logL(*params):
            N = np.sum(params)
            p = self.pdf(x, *params)
            p = p[p>0]
            ll = -N + np.sum(np.log(N*p))
            return -ll
        return logL

In [297]:
collection = dict()

x = bincenters

y = pdf_collection["208Tl"]
p = pdf(numpdf=(x, y), name="208Tl")
collection["208Tl"] = p

y = pdf_collection["214Bi"]
p = pdf(numpdf=(x, y), name="214Bi")
collection["214Bi"] = p

y = pdf_collection["0nubb"]
p = pdf(numpdf=(x, y), name="0nubb")
collection["0nubb"] = p

In [298]:
# collection = {"208Tl": p}

i = Extpdf(collection)

In [299]:
i.args_order

{0: '208Tl', 1: '214Bi', 2: '0nubb'}

In [303]:
x = np.arange(2.0, 2.7, 0.001)

ns = [0, 1, 1]
params = dict(zip(i.names, ns))

params = ns

y = i.pdf(x, *params);

In [304]:
r = i.rvs(*params, size=100)

In [305]:
plt.figure()
plt.plot(x, y)

# y = []
# for name in params:
#     n = params[name]
#     p = pdf_collection[name]
#     y.append((n/np.sum(ns))*p)
# y = np.sum(y, axis=0)
    
# plt.scatter(bincenters, y, s=10, c="r")
plt.hist(r, bins=150, density=True);

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Test Fits

In [332]:
i = Extpdf(collection)
print(i.args_order)

{0: '208Tl', 1: '214Bi', 2: '0nubb'}


In [333]:
# generate experiment
ns = [0, 100, 0]
experiment = i.rvs(*ns, size=1)

In [334]:
logL = i.eval_logL(experiment)

In [335]:
guess = [len(experiment)]*len(i.args_order)
m = Minuit(logL, *guess)
m.errordef = Minuit.LIKELIHOOD
m.limits = [(0, len(experiment))]*len(i.args_order)
m.migrad()
m.hesse()

0,1,2,3,4
FCN = -460.3,FCN = -460.3,Nfcn = 148,Nfcn = 148,Nfcn = 148
EDM = 2.84e-05 (Goal: 0.1),EDM = 2.84e-05 (Goal: 0.1),,,
Valid Minimum,Valid Parameters,SOME Parameters at limit,SOME Parameters at limit,SOME Parameters at limit
Below EDM threshold (goal x 10),Below EDM threshold (goal x 10),Below call limit,Below call limit,Below call limit
Covariance,Hesse ok,Accurate,Pos. def.,Not forced

0,1,2,3,4,5,6,7,8
,Name,Value,Hesse Error,Minos Error-,Minos Error+,Limit-,Limit+,Fixed
0.0,x0,0.0,2.6,,,0,100,
1.0,x1,96,10,,,0,100,
2.0,x2,5,4,,,0,100,

0,1,2,3
,x0,x1,x2
x0,3.75e-05,-6.72e-05 (-0.001),-8.12e-07
x1,-6.72e-05 (-0.001),104,-8.49 (-0.230)
x2,-8.12e-07,-8.49 (-0.230),13


In [336]:
# compute chi-square

binsize = 0.001
bins = np.arange(2, 2.7, binsize)
N = len(bins)-1
observed, _ = np.histogram(experiment, bins=bins)

# expected, integrate pdf
expected = []
dx = binsize/100
for b in range(len(bins)-1):
    a, b = bins[b], bins[b+1]
    xs = np.arange(a, b+dx, dx)
    expected.append(np.sum(i.pdf(xs, *m.values))*dx)
expected = np.array(expected)*np.sum(observed)
    
sel = expected>0
chi2_dof = np.sum((observed[sel]-expected[sel])**2/expected[sel])/(N-len(m.values))

print("Reduced chi-square:", round(chi2_dof, 3))

Reduced chi-square: 0.939


In [337]:
x = np.arange(2, 2.7, 0.001)
fit_result      = i.pdf(x, *m.values)
expected_result = i.pdf(x, *ns)

In [338]:
plt.figure()

plt.plot(x, fit_result     , c="r", label="fit")
plt.plot(x, expected_result, c="b", label="expected")
plt.hist(experiment, bins=200, density=True, alpha=0.5, label="experiment")
plt.legend()
plt.yscale("linear");

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …