# Fits

In [1]:
import numpy as np
from lmfit import Model, minimize, Parameters, report_fit

import nbimporter
import prepData as prep

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from scipy.stats import norm

from symfit import variables, parameters, Fit
import symfit as sm

## Functions
- For the background, the following parametric function will be used
$$exp \left( 2 \cdot \frac{|x - a+ib|^2}{|x - c+id|^2} + f \cdot (x - c) \right)$$
- For the signal, we will use either the Gaussian function
$$\mu \cdot exp \left( -\frac{1}{2} \frac{(x - x_0)^2}{\sigma^2} \right)$$
or a Maxwell-Boltzmann distribution
$$\mu \cdot \frac{x^2}{\sigma^3} \cdot exp \left( -\frac{1}{2} \frac{(x - x_0)^2}{\sigma^2} \right)$$
depending on the theoretical model we assume.

The total shape will be given by the sum of the two functions.

In [2]:
def bkg(x, a, b, c, d, e, f):                                                   
    return e**2*abs(x-a+1j*b)**2/abs(x-c+1j*d)**2+f*(x-c)

def gaussian(x, x0, s, mu):
    return mu * np.exp(-.5*((x-x0)/s)**2)
    
def maxwell(x, x0, s, mu):
    return mu * x**2/s**3 * np.exp(-.5*((x-x0)/s)**2)
    
    
def signal_gauss(x, a, b, c, d, e, f,
                    x0, s, mu):
    return bkg(x,a,b,c,d,e,f) + gaussian(x,x0,s,mu)
    
def signal_maxwell(x, a, b, c, d, e, f,
                      x0, s, mu):
    return bkg(x,a,b,c,d,e,f) + maxwell(x,x0,s,mu)

## Background

Parameters $a$ and $c$ need to be initialized to the cavity frequency.

In [3]:
def fit_bkg(x, y, w, center, ref):
    # set fit model
    bkg_model = Model(bkg)
    ps = bkg_model.make_params(a={'value':center, 'min':center*0.999, 'max':center*1.01},
                               b=2e4,
                               c={'value':center, 'min':center*0.999, 'max':center*1.01},
                               d=2.2e4,
                               e=1e-2*np.sqrt(ref),
                               f=1e-12*ref)
    # execute fit
    result = bkg_model.fit(y, x=x, params=ps, weights=1/w)
    
    return result

## Signal + Background

Background parameters will be initialized with the results of a previous fit, and will remain constant to facilitate convergence of the signal part. The $\sigma$ is set constant to 16 bins (default to 10.416 kHz), while the $x_0$ will be made to vary over the whole range of probed frequencies.

In [4]:
def fit_sig(x, y, w, x_0, init_params, signal, mu_init=1, mu_vary=True, par_vary=False):
    
    # set fit model
    sig_model = Model(signal)
    ps = sig_model.make_params(a ={'value':init_params['a'], 'vary':par_vary},
                               b ={'value':init_params['b'], 'vary':par_vary},
                               c ={'value':init_params['c'], 'vary':par_vary},
                               d ={'value':init_params['d'], 'vary':par_vary},
                               e ={'value':init_params['e'], 'vary':par_vary},
                               f ={'value':init_params['f'], 'vary':par_vary},
                               mu={'value':mu_init, 'min':0, 'vary':mu_vary},
                               x0={'value':x_0, 'vary':False},
                               s ={'value':16*651, 'vary':False}) # fixed value to 16 bins

    result = sig_model.fit(y, x=x, params=ps, weights=1/w)
    
    return result

## Plot Results

The following plots will be shown:
- Power vs. frequency, including the best fit function
- Residuals vs. frequency
- Distribution of normalized residual  $\frac{y_{fit} - y}{\sigma}$; ideally, it should be a Gaussian with $\mu = 0$ and $\sigma = 1$
- Residual vs. frequency, highlighting the $\pm \sigma$ belt

In [5]:
def plot_fit(x, y, w, fit_result):
    # prepare canvas
    fig = plt.figure(figsize=(15,10))
    gs  = GridSpec(2, 2)
    ax  = fig.add_subplot(gs[0,:])
    ax1 = fig.add_subplot(gs[1,0])
    ax2 = fig.add_subplot(gs[1,1])
    
    # plot data and best fit
    ax.plot(x, y, 'o', label='data')
    ax.plot(x, fit_result.best_fit, color='red', label='fit')
    
    fmin, fmax = min(x), max(x)
    ax.set_xlim([fmin,fmax])
    ax.legend()
    ax.set_xlabel('Frequence [Hz]')
    ax.set_ylabel('FFT')
    
    #residuals w.r.t. freq
    fit_result.plot_residuals(ax=ax1)
    
    
    # plot histogrm of residuals (with the fit)
    rangeMax = int(np.max(fit_result.residual))+1
    
    ax2.hist(fit_result.residual, bins=15, density=True, range=(-rangeMax,rangeMax))
    
    
    fit_res = norm.fit(fit_result.residual, loc=0, scale=1)
    ax2.plot(np.linspace(-rangeMax, rangeMax, 100),
             norm.pdf(np.linspace(-rangeMax, rangeMax, 100), fit_res[0], fit_res[1]), color="red")
    ax2.axvline(fit_res[0], color='black', linestyle='dashed', linewidth=1)
    
    summary_text = "mean: {}\n std: {}".format(np.round(fit_res[0],3), np.round(fit_res[1], 3))
    ax2.text(0.9, 0.9, summary_text, transform=fig.gca().transAxes, ha='right', va='top')
    ax2.set_xlabel('Residuals')


    # plot of residuals vs. freq and error band
    fig3, ax3 = plt.subplots(1, 1, figsize=(18,5))
    
    ax3.scatter(x, fit_result.residual*w, label="residuals")
    ax3.plot(x,  w, label="+$\sigma$")
    ax3.plot(x, -w, label="-$\sigma$")
    ax3.set_xlim([fmin,fmax])
    ax3.set_xlabel('Frequence [Hz]')
    ax3.set_ylabel('Residual')
    ax3.legend()
    
    plt.show()
    return fit_result

## Fit Multiple Runs Simultaneously

In [6]:
def multipleFitBKG(InfoDataset):
    
    fitResult = []
    # save fit results in dictionary
    for run in InfoDataset:
        # fit background for each individual run separately 
        bkg_result = fit_bkg(run["freq"], run["fft"],
                             run["weights"], run["center"], run["ref"])
            
        results = {"run":run["name"],
                   "background_bestParams":bkg_result.params.valuesdict(),
                   "background_bestfit"   :bkg_result.best_fit,
                   "background_residuals" :bkg_result.residual}
            
        fitResult.append(results)
    
    return(fitResult)

In [61]:
def signal_dataset(params, i, x):
    """calc sig+bkg from params for data set i
    using simple, hardwired naming convention"""
    a  = params["a_%i"  % (i+1)].value
    b  = params["b_%i"  % (i+1)].value
    c  = params["c_%i"  % (i+1)].value
    d  = params["d_%i"  % (i+1)].value
    e  = params["e_%i"  % (i+1)].value
    f  = params["f_%i"  % (i+1)].value
    mu = params["mu_%i" % (i+1)].value
    x0 = params["x0_%i" % (i+1)].value
    s  = params["s_%i"  % (i+1)].value
    return signal_gauss(x, a, b, c, d, e, f, x0, s, mu)


def objective(params, x, data):
    """ calculate total residual for fits to several data sets held
    in a 2-D array, and modeled by Gaussian functions"""
    ndata, _ = data.shape
    resid = 0.0*data[:]

    # make residual per data set
    for i in range(ndata):
        resid[i, :] = data[i, :] - signal_dataset(params, i, x[i, :])

    # now flatten this to a 1D array, as minimize() needs
    return resid.flatten()


def multipleFitSIG(InfoDataset, fitBkg, x_0, sigma=16*651, mu_init=1, mu_vary=True, par_vary=False):
    # create sets of parameters, one per data set
    fit_params = Parameters()
    # create 2-D arrays of freq and fft for objective function
    x    = np.zeros(shape=(len(InfoDataset), len(InfoDataset[0]["freq"])))
    data = np.zeros(shape=(len(InfoDataset), len(InfoDataset[0]["fft"])))
    
    for i in range(len(InfoDataset)):
        fit_params.add( "a_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["a"], vary=par_vary)
        fit_params.add( "b_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["b"], vary=par_vary)
        fit_params.add( "c_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["c"], vary=par_vary)
        fit_params.add( "d_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["d"], vary=par_vary)
        fit_params.add( "e_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["e"], vary=par_vary)
        fit_params.add( "f_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["f"], vary=par_vary)
        fit_params.add( "mu_%i" % (i+1), value=mu_init, min=0, vary=mu_vary)
        fit_params.add( "x0_%i" % (i+1), value=x_0, vary=False)
        fit_params.add( "s_%i"  % (i+1), value=sigma, vary=False)
        
        x[i]    = InfoDataset[i]["freq"]
        data[i] = InfoDataset[i]["fft"]

    # Now constrain all values of mu to have the same value
    for i in range(2, len(InfoDataset)):
        fit_params["mu_%i" % i].expr="mu_1"

    # run the global fit to all the data sets
    result = minimize(objective, fit_params, args=(x, data), method="emcee3")
    
    fitResult = []
    # save fit results in dictionary
    for run in InfoDataset:
        results = {"run":run["name"],
                   "signal_bestParams":bkg_result.params.valuesdict(),
                   "background_bestfit"   :bkg_result.best_fit,
                   "background_residuals" :bkg_result.residual}
            
        fitResult.append(results)
    
    return(fitResult)
    report_fit(result)

In [33]:
InfoDataset = prep.Load()

In [35]:
fitBkg = multipleFitBKG(InfoDataset)

In [36]:
mu_init=1
x_0=10353357599.0
sigma=16*651
par_vary=False
mu_vary=True
# create sets of parameters, one per data set
fit_params = Parameters()
# create 2-D arrays of freq and fft for objective function
x    = np.zeros(shape=(len(InfoDataset), len(InfoDataset[0]["freq"])))
data = np.zeros(shape=(len(InfoDataset), len(InfoDataset[0]["fft"])))
   
for i in range(len(InfoDataset)):
    fit_params.add( "a_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["a"], vary=par_vary)
    fit_params.add( "b_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["b"], vary=par_vary)
    fit_params.add( "c_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["c"], vary=par_vary)
    fit_params.add( "d_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["d"], vary=par_vary)
    fit_params.add( "e_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["e"], vary=par_vary)
    fit_params.add( "f_%i"  % (i+1), value=fitBkg[i]["background_bestParams"]["f"], vary=par_vary)
    fit_params.add( "mu_%i" % (i+1), value=mu_init, min=0, vary=mu_vary)
    fit_params.add( "x0_%i" % (i+1), value=x_0, vary=False)
    fit_params.add( "s_%i"  % (i+1), value=sigma, vary=False)
        
    x[i]    = InfoDataset[i]["freq"]
    data[i] = InfoDataset[i]["fft"]

In [40]:
for i in range(2, len(runsData)):
        fit_params["mu_%i" % i].expr="mu_1"

In [54]:
res = objective(fit_params, x, data)

In [45]:
ndata, _ = data.shape
resid = 0.0*data[:]

print(ndata, resid.shape)

13 (13, 200)


In [47]:
resid[i,:].shape

(200,)

In [51]:
a = signal_dataset(fit_params, i, x)
a.shape

(13, 200)

In [48]:
# make residual per data set
for i in range(ndata):
    resid[i, :] = data[i, :] - signal_dataset(params, i, x)

# now flatten this to a 1D array, as minimize() needs
return resid.flatten()

NameError: name 'params' is not defined

In [57]:
result = minimize(objective, fit_params, args=(x, data))

In [66]:
result.params

name,value,standard error,relative error,initial value,min,max,vary,expression
a_1,10354000000.0,0.0,(0.00%),10353522219.620493,-inf,inf,False,
b_1,20852.9578,0.0,(0.00%),20852.95781749073,-inf,inf,False,
c_1,10354000000.0,0.0,(0.00%),10353522435.553326,-inf,inf,False,
d_1,21974.3249,0.0,(0.00%),21974.32490149998,-inf,inf,False,
e_1,186.997387,0.0,(0.00%),186.9973868189828,-inf,inf,False,
f_1,-0.00589733,0.0,(0.00%),-0.0058973262847713,-inf,inf,False,
mu_1,2.6099e-06,1.92556005,(73778666.35%),1.0,0.0,inf,True,
x0_1,10353000000.0,0.0,(0.00%),10353357599.0,-inf,inf,False,
s_1,10416.0,0.0,(0.00%),10416.0,-inf,inf,False,
a_2,10353000000.0,0.0,(0.00%),10353494009.054926,-inf,inf,False,
