#  Fitting Spectrum

This notebook shows the aplication of fitting code to a toy 6-component background model, eg 3 isotopes fimes 2 volumes. The fit is done simulaneously on E and Z 1D histograms, assuming the two to be uncorrelated. The fit is performed with iminuit. 

In [None]:
import matplotlib.pylab as plt
#import scipy.optimize as sop
from iminuit import Minuit
import tables as tb
import pandas as pd
from Histogram import Histogram as hist
from PDF import PDF
import numpy as np
from Fits import Fit
from pprint import pprint # we use this to pretty print some stuff later
%matplotlib inline
%load_ext autoreload
%autoreload 2

np.warnings.filterwarnings('ignore')

plt.rcParams["figure.figsize"]          = 32, 24
plt.rcParams["font.size"]               = 15
plt.rcParams["figure.max_open_warning"] = 100

In [None]:
# Number of event per background component. First index is isotope type, second index is volume type. 
# This will be used as MC truth sample.
N_0_0 = 150000
N_0_1 = 350000
N_1_0 = 130000
N_1_1 = 270000
N_2_0 = 200000
N_2_1 = 300000

# Poisson smearing of number of events. This will be used as DATA-like sample.
N_0_0p = np.random.poisson(N_0_0)
N_0_1p = np.random.poisson(N_0_1)
N_1_0p = np.random.poisson(N_1_0)
N_1_1p = np.random.poisson(N_1_1)
N_2_0p = np.random.poisson(N_2_0)
N_2_1p = np.random.poisson(N_2_1)

In [None]:
# PDF for each background component drawn from some functions. Both for E and Z 1D histos. Draw Nsim events for each MC truth PDF
Nsim = int(1.e6)

# 
D_0_0_e_pdf = np.random.exponential(200,   Nsim)
D_0_0_z_pdf = np.random.normal     (30, 9, Nsim)

D_0_1_e_pdf = np.random.exponential(200,    Nsim)
D_0_1_z_pdf = np.random.normal     (90, 25, Nsim)

D_1_0_e_pdf = np.random.rayleigh   (500,    Nsim)
D_1_0_z_pdf = np.random.normal     (30, 9,  Nsim)

D_1_1_e_pdf = np.random.rayleigh   (500,    Nsim)
D_1_1_z_pdf = np.random.normal     (90, 25, Nsim)

D_2_0_e_pdf = np.random.chisquare  (1500,   Nsim)
D_2_0_z_pdf = np.random.normal     (30, 9,  Nsim)

D_2_1_e_pdf = np.random.chisquare  (1500,   Nsim)
D_2_1_z_pdf = np.random.normal     (90, 25, Nsim)

In [None]:
# This how DATA-like events are distributed in E and Z. 
# The PDFs arew the same as the MC truth ones, but number of events match DATA-like samples.
D_0_0_e = np.random.exponential(200,   N_0_0p)
D_0_0_z = np.random.normal     (30, 9, N_0_0p)

D_0_1_e = np.random.exponential(200,    N_0_1p)
D_0_1_z = np.random.normal     (90, 25, N_0_1p)

D_1_0_e = np.random.rayleigh   (500,    N_1_0p)
D_1_0_z = np.random.normal     (30, 9,  N_1_0p)

D_1_1_e = np.random.rayleigh   (500,    N_1_1p)
D_1_1_z = np.random.normal     (90, 25, N_1_1p)

D_2_0_e = np.random.chisquare  (1500,   N_2_0p)
D_2_0_z = np.random.normal     (30, 9,  N_2_0p)

D_2_1_e = np.random.chisquare  (1500,   N_2_1p)
D_2_1_z = np.random.normal     (90, 25, N_2_1p)


In [None]:
# The MC and DATA-like events are histogrammed in E and Z, assuming some number of bins, and histogram ranges.
nbin = 150
minE, maxE = 0, 2000
minZ, maxZ = 0, 150

# DATA-like
h_0_0_e = hist(D_0_0_e, nbin, minE, maxE)
h_0_0_z = hist(D_0_0_z, nbin, minZ, maxZ)
h_0_1_e = hist(D_0_1_e, nbin, minE, maxE)
h_0_1_z = hist(D_0_1_z, nbin, minZ, maxZ)
h_1_0_e = hist(D_1_0_e, nbin, minE, maxE)
h_1_0_z = hist(D_1_0_z, nbin, minZ, maxZ)
h_1_1_e = hist(D_1_1_e, nbin, minE, maxE)
h_1_1_z = hist(D_1_1_z, nbin, minZ, maxZ)
h_2_0_e = hist(D_2_0_e, nbin, minE, maxE)
h_2_0_z = hist(D_2_0_z, nbin, minZ, maxZ)
h_2_1_e = hist(D_2_1_e, nbin, minE, maxE)
h_2_1_z = hist(D_2_1_z, nbin, minZ, maxZ)

# MC
h_0_0_e_pdf = hist(D_0_0_e_pdf, nbin, minE, maxE)
h_0_0_z_pdf = hist(D_0_0_z_pdf, nbin, minZ, maxZ)
h_0_1_e_pdf = hist(D_0_1_e_pdf, nbin, minE, maxE)
h_0_1_z_pdf = hist(D_0_1_z_pdf, nbin, minZ, maxZ)
h_1_0_e_pdf = hist(D_1_0_e_pdf, nbin, minE, maxE)
h_1_0_z_pdf = hist(D_1_0_z_pdf, nbin, minZ, maxZ)
h_1_1_e_pdf = hist(D_1_1_e_pdf, nbin, minE, maxE)
h_1_1_z_pdf = hist(D_1_1_z_pdf, nbin, minZ, maxZ)
h_2_0_e_pdf = hist(D_2_0_e_pdf, nbin, minE, maxE)
h_2_0_z_pdf = hist(D_2_0_z_pdf, nbin, minZ, maxZ)
h_2_1_e_pdf = hist(D_2_1_e_pdf, nbin, minE, maxE)
h_2_1_z_pdf = hist(D_2_1_z_pdf, nbin, minZ, maxZ)

# In DATA-like, only know the sum of all background components
h_total_e = hist(np.concatenate([D_0_0_e, D_0_1_e, D_1_0_e, D_1_1_e, D_2_0_e, D_2_1_e]), nbin, minE, maxE)
h_total_z = hist(np.concatenate([D_0_0_z, D_0_1_z, D_1_0_z, D_1_1_z, D_2_0_z, D_2_1_z]), nbin, minZ, maxZ)

In [None]:
# MC truth energy histograms
plt.figure()
plt.plot(h_0_0_e.bins, h_0_0_e_pdf.hist)
plt.plot(h_1_0_e.bins, h_1_0_e_pdf.hist)
plt.plot(h_2_0_e.bins, h_2_0_e_pdf.hist)

# DATA-like energy histograms
plt.figure()
plt.errorbar(h_0_1_e.bins, h_0_1_e.hist, yerr=np.sqrt(h_0_1_e.hist), 
         marker='.', elinewidth=1, linewidth=0)
plt.errorbar(h_1_1_e.bins, h_1_1_e.hist, yerr=np.sqrt(h_1_1_e.hist),
             marker='.', elinewidth=1, linewidth=0)
plt.errorbar(h_2_1_e.bins, h_2_1_e.hist, yerr=np.sqrt(h_2_1_e.hist),
             marker='.', elinewidth=1, linewidth=0)

In [None]:
# MC truth Z histograms
plt.figure()
plt.plot(h_0_0_z.bins, h_0_0_z_pdf.hist)
plt.plot(h_0_1_z.bins, h_1_1_z_pdf.hist)

# DATA-like Z histograms
plt.figure()
plt.errorbar(h_0_0_z.bins, h_0_0_z.hist, yerr=np.sqrt(h_0_0_z.hist), 
         marker='.', elinewidth=1, linewidth=0)
plt.errorbar(h_0_1_z.bins, h_0_1_z.hist, yerr=np.sqrt(h_0_1_z.hist), 
         marker='.', elinewidth=1, linewidth=0)


In [None]:
# Make PDFs out of MC truth histograms. These are normalized to unity.
interpol = 'cubic'

pdf_0_0_e  = PDF(h_0_0_e_pdf, interpolation=interpol)
pdf_0_0_z  = PDF(h_0_0_z_pdf, interpolation=interpol)
pdf_0_1_e  = PDF(h_0_1_e_pdf, interpolation=interpol)
pdf_0_1_z  = PDF(h_0_1_z_pdf, interpolation=interpol)
pdf_1_0_e  = PDF(h_1_0_e_pdf, interpolation=interpol)
pdf_1_0_z  = PDF(h_1_0_z_pdf, interpolation=interpol)
pdf_1_1_e  = PDF(h_1_1_e_pdf, interpolation=interpol)
pdf_1_1_z  = PDF(h_1_1_z_pdf, interpolation=interpol)
pdf_2_0_e  = PDF(h_2_0_e_pdf, interpolation=interpol)
pdf_2_0_z  = PDF(h_2_0_z_pdf, interpolation=interpol)
pdf_2_1_e  = PDF(h_2_1_e_pdf, interpolation=interpol)
pdf_2_1_z  = PDF(h_2_1_z_pdf, interpolation=interpol)


pdf_list_e = [pdf_0_0_e, pdf_0_1_e, pdf_1_0_e, pdf_1_1_e, pdf_2_0_e, pdf_2_1_e]
pdf_list_z = [pdf_0_0_z, pdf_0_1_z, pdf_1_0_z, pdf_1_1_z, pdf_2_0_z, pdf_2_1_z]

In [None]:
# Build dictionaries
x_dict = {'E': h_0_0_e.bins , 'z': h_0_0_z.bins}
totals = {'E': h_total_e    , 'z': h_total_z   }
pdfs   = {'E': pdf_list_e   , 'z': pdf_list_z  }
N_0s   = np.array([N_0_0, N_0_1, N_1_0, N_1_1, N_2_0, N_2_1])
pprint(totals)
pprint(list(x_dict.keys()))
pprint(N_0s)
pprint(N_0s.reshape(len(N_0s), 1))

## Fit initialization

In [None]:
#Fit initialization
fit = Fit(x_dict, totals, pdfs, N_0s)
m = Minuit.from_array_func(fit.GetIminuitChi2, (1.,1.,1.,1.,1.,1.), error=0.1, limit=(0.1,10), errordef=1)
m.print_param()

## Migrad minimization

In [None]:
fmin, param = m.migrad()

In [None]:
# Note that m.values returns a dictionary of {'name',value} pairs. 
# By asking for m.values.values(), we only return the values
result = np.array(m.values.values())
pprint(result)

In [None]:
factors = result * N_0s

interpol = 'nearest'

pdf_0_0_ep  = PDF(h_0_0_e_pdf, factor=factors[0]  ,interpolation=interpol)
pdf_0_1_ep  = PDF(h_0_1_e_pdf, factor=factors[1]  ,interpolation=interpol)
pdf_1_0_ep  = PDF(h_1_0_e_pdf, factor=factors[2]  ,interpolation=interpol)
pdf_1_1_ep  = PDF(h_1_1_e_pdf, factor=factors[3]  ,interpolation=interpol)
pdf_2_0_ep  = PDF(h_2_0_e_pdf, factor=factors[4]  ,interpolation=interpol)
pdf_2_1_ep  = PDF(h_2_1_e_pdf, factor=factors[5]  ,interpolation=interpol)

pdf_0_0_zp  = PDF(h_0_0_z_pdf, factor=factors[0]  ,interpolation=interpol)
pdf_0_1_zp  = PDF(h_0_1_z_pdf, factor=factors[1]  ,interpolation=interpol)
pdf_1_0_zp  = PDF(h_1_0_z_pdf, factor=factors[2]  ,interpolation=interpol)
pdf_1_1_zp  = PDF(h_1_1_z_pdf, factor=factors[3]  ,interpolation=interpol)
pdf_2_0_zp  = PDF(h_2_0_z_pdf, factor=factors[4]  ,interpolation=interpol)
pdf_2_1_zp  = PDF(h_2_1_z_pdf, factor=factors[5]  ,interpolation=interpol)

pprint(pdf_0_0_ep)
pprint(result)
pprint(N_0s)
pprint(factors)

In [None]:
# h_0_0_e.bins[0] and h_0_0_e.bins[-1] are bin central values of first and last bin, respectively. Same for z
# Ep and Zp return 1000 evenly-spaced numbers over this interval
Ep = np.linspace(h_0_0_e.bins[0],h_0_0_e.bins[-1],1000)
Zp = np.linspace(h_0_0_z.bins[0],h_0_0_z.bins[-1],1000)

y_0_0_e = pdf_0_0_ep.pdf(Ep)
y_0_1_e = pdf_0_1_ep.pdf(Ep)
y_1_0_e = pdf_1_0_ep.pdf(Ep)
y_1_1_e = pdf_1_1_ep.pdf(Ep)
y_2_0_e = pdf_2_0_ep.pdf(Ep)
y_2_1_e = pdf_2_1_ep.pdf(Ep)

y_0_0_z = pdf_0_0_zp.pdf(Zp)
y_0_1_z = pdf_0_1_zp.pdf(Zp)
y_1_0_z = pdf_1_0_zp.pdf(Zp)
y_1_1_z = pdf_1_1_zp.pdf(Zp)
y_2_0_z = pdf_2_0_zp.pdf(Zp)
y_2_1_z = pdf_2_1_zp.pdf(Zp)


fittotal_e = y_0_0_e + y_0_1_e + y_1_0_e + y_1_1_e + y_2_0_e + y_2_1_e
fittotal_z = y_0_0_z + y_0_1_z + y_1_0_z + y_1_1_z + y_2_0_z + y_2_1_z

fit_0_a_e = y_0_0_e + y_0_1_e 
fit_1_a_e = y_1_0_e + y_1_1_e 
fit_2_a_e = y_2_0_e + y_2_1_e 

fit_a_0_z = y_0_0_z + y_1_0_z + y_2_0_z 
fit_a_1_z = y_0_1_z + y_1_1_z + y_2_1_z 


In [None]:
alfa= 0.5
#plt.semilogy()
#plt.ylim(1e-1,2e5)

plt.figure()
plt.errorbar(h_total_e.bins, h_total_e.hist, yerr=np.sqrt(h_total_e.hist),
             marker='.', elinewidth=1, linewidth=0,label='data',color='black')

plt.plot(Ep, fittotal_e,label='fit')

plt.fill_between(Ep, 0 , fit_0_a_e, label='fit_e_0',alpha=alfa,color='r')
plt.fill_between(Ep, 0 , fit_1_a_e, label='fit_e_1',alpha=alfa,color='b')
plt.fill_between(Ep, 0 , fit_2_a_e, label='fit_e_2',alpha=alfa,color='g')

plt.legend().get_frame().set_facecolor('0.95')


plt.figure()
plt.errorbar(h_total_z.bins, h_total_z.hist, yerr=np.sqrt(h_total_e.hist),
             marker='.', elinewidth=1, linewidth=0,label='data',color='black')

plt.plot(Zp, fittotal_z,label='fit')

plt.fill_between(Zp, 0 , fit_a_0_z, label='fit_z_0',alpha=alfa,color='r')
plt.fill_between(Zp, 0 , fit_a_1_z, label='fit_z_1',alpha=alfa,color='b')

plt.legend().get_frame().set_facecolor('0.95')


In [None]:
plt.figure()

plt.plot(Ep, fit_0_a_e,label='fit_e_0_all', color='black')
plt.fill_between(Ep, 0 , y_0_0_e, label='fit_e_0_0',alpha=alfa,color='r')
plt.fill_between(Ep, 0 , y_0_1_e, label='fit_e_0_1',alpha=alfa,color='b')
plt.legend().get_frame().set_facecolor('0.95')

plt.figure()

plt.plot(Ep, fit_1_a_e,label='fit_e_1_all', color='black')
plt.fill_between(Ep, 0 , y_1_0_e, label='fit_e_1_0',alpha=alfa,color='r')
plt.fill_between(Ep, 0 , y_1_1_e, label='fit_e_1_1',alpha=alfa,color='b')
plt.legend().get_frame().set_facecolor('0.95')

plt.figure()

plt.plot(Ep, fit_2_a_e,label='fit_e_2_all', color='black')
plt.fill_between(Ep, 0 , y_2_0_e, label='fit_e_2_0',alpha=alfa,color='r')
plt.fill_between(Ep, 0 , y_2_1_e, label='fit_e_2_1',alpha=alfa,color='b')
plt.legend().get_frame().set_facecolor('0.95')


In [None]:
plt.figure()

plt.plot(Zp, fit_a_0_z,label='fit_z_all_0', color='black')
plt.fill_between(Zp, 0 , y_0_0_z, label='fit_z_0_0',alpha=alfa,color='r')
plt.fill_between(Zp, 0 , y_1_0_z, label='fit_z_1_0',alpha=alfa,color='b')
plt.fill_between(Zp, 0 , y_2_0_z, label='fit_z_2_0',alpha=alfa,color='g')
plt.legend().get_frame().set_facecolor('0.95')

plt.figure()

plt.plot(Zp, fit_a_1_z,label='fit_z_all_1', color='black')
plt.fill_between(Zp, 0 , y_0_1_z, label='fit_z_0_1',alpha=alfa,color='r')
plt.fill_between(Zp, 0 , y_1_1_z, label='fit_z_1_1',alpha=alfa,color='b')
plt.fill_between(Zp, 0 , y_2_1_z, label='fit_z_2_1',alpha=alfa,color='g')
plt.legend().get_frame().set_facecolor('0.95')

## Minos minimization

In [None]:
param = m.minos()

## Parameter uncertainties, covariances, and confidence intervals

In [None]:
param = m.hesse()

In [None]:
pprint('Covariance mattrix as numpy array:')
pprint(m.np_matrix())

pprint('Correlation matrix as numpy array:')
pprint(m.np_matrix(correlation=True))

In [None]:
# Parameter scans around chi2 minimum
x0, fx0 = m.profile('x0', subtract_min=True)
plot(x0, fx0);

In [None]:
#m.draw_contour('x0','x1');
x,y,z = m.contour('x0', 'x1', subtract_min=True)
cs = contour(x,y,z, (1, 2, 3, 4))
clabel(cs);

In [None]:
plt.figure()
m.draw_mncontour('x0','x1', nsigma=1);  # nsigma=1 says: draw one contour for sigma=1
plt.figure()
m.draw_mncontour('x0','x2', nsigma=1); 


plt.figure()
m.draw_mncontour('x1','x2', nsigma=1); 
plt.figure()
m.draw_mncontour('x1','x3', nsigma=1); 

plt.figure()
m.draw_mncontour('x2','x3', nsigma=1); 
plt.figure()
m.draw_mncontour('x2','x4', nsigma=1); 


plt.figure()
m.draw_mncontour('x3','x4', nsigma=1); 

plt.figure()
m.draw_mncontour('x4','x5', nsigma=1); 