# Multiple fit code

In [59]:
import os
import scipy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from math import factorial
from scipy import optimize
from matplotlib.pyplot import cm
from lmfit import Model
from matplotlib.gridspec import GridSpec
from scipy.stats import poisson as pois
from scipy.stats import norm

import nbimporter
import fitFunc as fits

## Multiple loading (load + prep)

In [2]:
def multipleLoad(path='db/',bin_width=651,nbins=100):
    
    def list_files(path):
        listFile=[]
        for root, dirs, files in os.walk(path):
            for file in files:
                listFile.append(os.path.join(root, file))
        return listFile
                
    
    file_list=list_files(path)
    InfoDataset=[]
    
    for file in file_list:
        
        meta = pd.read_excel(file,sheet_name=0,header=None)
        freq = pd.read_excel(file,sheet_name=1)             # frequecies
        fft  = pd.read_excel(file,sheet_name=2)             # power
    
        data = pd.DataFrame({'freq':freq[1]})
    
        col = 0
        for col_name in fft.columns: # load all the subruns
            if col > 0:
                data[f'fft{col-1}'] = fft[col_name]
            col += 1
        #cavity frequency and number of files per run
        center = meta[1][3]
        length = meta[1][8]
        N = length*2731 #N=1365500 if length=500
        
        mask = (data['freq']>center-bin_width*nbins) & (data['freq']<center+bin_width*nbins)
        cavdata = data[mask].reset_index(drop=True)
        
        weights = pd.DataFrame()
        ref=[]
        # scale data to yottowat
        for ind in range(1,col):
            minW = np.min(cavdata[f'fft{ind-1}'].copy()) # minimum power in the cavity
            
            # In general, the average measured power should be known and equal to the noise temperature of the system.
            # So we can rescale the data so that the power at the cavity frequency sia T_noise k_b B (W)
            ref_ = minW**(-1) * 3.5*1.38e-23*651/1e-24 #It is possibile to add an extra contribute to make them integers
            
            cavdata[f'fft{ind-1}'] = ref_ * cavdata[f'fft{ind-1}']   #y' 
            
            # set weights -> sqrt(sigma'/N)=ref*sqrt(y/N)
            weights[f'fft{ind-1}'] = np.sqrt(ref_)*np.sqrt(cavdata[f'fft{ind-1}'])/np.sqrt(N) 
            
            ref.append(ref_)
        
        checkSub = False
        if col > 2:
            checkSub = True
        
        Info={"name":file,"subrun":checkSub,"length":length,"center":center,"cavdata":cavdata,"weights":weights,"ref":ref} 
        InfoDataset.append(Info)
            
    return(InfoDataset)

In [3]:
runsData=multipleLoad()
#runsData

## Multiple fit

In [None]:
if fitSig:
            sig_result=fit_sig(run["cavdata"]["freq"],run["cavdata"]["fft"],run["weights"],
                               fitInfo["signalFunc"],bkg_result,
                               fitInfo["x0"],fitInfo["mu_init"],fitInfo["mu_vary"])
                               
            
            results={"run":run["name"],
                     "background":{"background_bestFit":bkg_result.params.valuesdict(),"background_residuals":bkg_result.residual},
                     "signal":{"signal_bestFit":sig_result.params.valuesdict(),"signal_residuals":sig_result.residual}}
        
        fitResult.append(results)
    
    return(fitResult)

In [60]:
def multipleFitBKG(InfoDataset):
    
    fitResult=[]
    for run in InfoDataset:
        numCol=len(run["cavdata"].columns)-1
        for subrun in range(numCol):
            bkg_result=fits.fit_bkg(run["cavdata"]["freq"],run["cavdata"][f'fft{subrun}'],
                           run['weights'][f'fft{subrun}'],run["center"],run["ref"][subrun])
            
            results={"run":run["name"],"subrun":f'fft{subrun}',
                 "background_bestParams":bkg_result.params.valuesdict(),
                     "background_residuals":bkg_result.residual,
                     "background_bestfit":bkg_result.best_fit}
            
            fitResult.append(results)
    
    return(fitResult)

In [62]:
fits_bkg=multipleFitBKG(runsData)
#fits_bkg

In [61]:
def multipleFitSIG(InfoDataset,fitBkg,x0,mu,sigma):
    
    fitResult=[]
    for run in InfoDataset:
        numCol=len(run["cavdata"].columns)-1
        for subrun in range(numCol):
            bkg_result=fits.fit_bkg(run["cavdata"]["freq"],run["cavdata"][f'fft{subrun}'],
                           run['weights'][f'fft{subrun}'],run["center"],run["ref"][subrun])
            
            results={"run":run["name"],"subrun":f'fft{subrun}',
                 "background_bestParams":bkg_result.params.valuesdict(),
                     "background_residuals":bkg_result.residual,
                     "background_bestfit":bkg_result.best_fit}
            
            fitResult.append(results)
    
    return(fitResult)

In [None]:
fits_sig=
#fits_sig

## CI

In [None]:
def CI(x, y, w, bkg_params, center, ref, x_0, signal, mu_fix, sig, toy_0, N=1365500, draw=False, verbose=False):
    
    n_toy = len(toy_0)
    
    # scan for mu
    q_mu_obs_best = 0            # save distribution and parameters
    q_mu_best = np.empty(n_toy)  # to plot optimal result
    q0_best = np.empty(n_toy)
    r_best = 1e10
    mu_95 = 0
    
    for i_mu in range(len(mu_fix)): 
        
        fix = fits.fit_sig(x, y, w, x_0, bkg_params, signal, mu_init=mu_fix[i_mu], mu_vary=False).best_fit
        
        # compute likelihood ratio of observed data
        q_mu_obs = lh_ratio(y, fix, sig)
        
        # generate toy datasets from fixed mu
        toy_fix = gen_toydataset(fix, n_toy, N)
        
        # comute distriution of likelihood ratios
        q_mu = calc_qmu(x, toy_fix, center, ref, x_0, mu_fix[i_mu], signal, N)
        q0   = calc_qmu(x, toy_0,   center, ref, x_0, mu_fix[i_mu], signal, N)
        
        # compute p-values
        p_mu = p_value(q_mu_obs, q_mu)
        p_b  = p_value(q_mu_obs, q0)
        
        # compute ratio
        r = p_mu/p_b
        
        if(verbose):
            print("Mu: ", mu_fix[i_mu], "   q(mu)_obs =", q_mu_obs,
                  "\np_mu =", p_mu, "  p_b =", p_b, "  ratio =", r, "\n")
            
        if(np.abs(r - 0.05) < np.abs(r_best - 0.05)):
            q_mu_obs_best = q_mu_obs
            q_mu_best = q_mu
            q0_best = q0 
            r_best = r
            mu_95 = mu_fix[i_mu]
            
    # plot significance distribution
    if(draw):
        plot_lhratio(q_mu_obs_best, q0_best, q_mu_best, x_0, mu_95)
        
    return(mu_95)

In [None]:
def CI_mu(dataRuns,
          x_0=np.array([]), mu_fix=np.array([]), n_toy=500,
          signal=fits.signal_gauss):
    
    #n_toy : number of toypoints for each x0 for each func(background and signal)
    #x_0 : x0 to be tested
    #mu_fix : mu to be tested
    #signal : which signal
    
    # fit background once
    res_bkg = multipleFitBKG(dataRuns)
    
    bkg        = {}
    bkg_params = {}
    toy_0 = {}
    for fit in res_bkg:
        
        bestFit={"run":fit['run'],"subrun":fit["subrun"],"bestfit":fit["background_bestfit"]}
        bestParams={"run":fit['run'],"subrun":fit["subrun"],"bestParams":fit["background_bestParams"]}
        bkg.append(bestFit)
        bkg.append(bestParams)
        
        toy_bkg = stats.gen_toydataset(values=fit["background_bestfit"], n=n_toy)
        toy_0.append({"run":fit['run'],"subrun":fit["subrun"],"toy_0":toy_bkg})
    
    mu_CI = np.empty(len(x_0))
    
    for i_x0 in range(len(x_0)):
        mu_CI[i_x0] = CI()
    return mu_CI

## TEST