# Multiple Runs Analysis

Notebook to test functions for mutiple runs simultaneously.

In [1]:
import nbimporter
import prepData as prep
import fitFunc as fits
import Statistics as stat

import matplotlib.pyplot as plt
import numpy as np 
import math

import multiprocessing

## Multiple Runs fit

In [2]:
x0 = 1.0353e+10+410000

In [3]:
runsData = prep.Load()

In [None]:
mins=np.empty(len(runsData))
maxs=np.empty(len(runsData))

for i_run,run in enumerate(runsData):
    mins[i_run]=min(run["freq"])
    maxs[i_run]=max(run["freq"])
    
    plt.plot(run["freq"],run["fft"])
    print(mins[i_run],maxs[i_run])

In [None]:
np.min(mins),np.max(maxs)

In [None]:
fitBkg = fits.multipleFitBKG(runsData)

In [None]:
fitSig = fits.multipleFitSIG(runsData, fitBkg, x_0=x0)

## Confidence interval

In [4]:
def multiple_calc_qmu(toyData, x0,mu_fix):
    
    n_toy = len(toyData)
    
    # compute likelihood ratio for toy dataset
    q_mu = np.empty(n_toy)
    for i_toy,toy in enumerate(toyData):
        
        toy_bkg_params = fits.multipleFitBKG(toy)
        
        fix_toy=fits.multipleFitSIG(toy, toy_bkg_params, x_0=x0,mu_init=mu_fix,mu_vary=False)
        fitSig_toy=fits.multipleFitSIG(toy, toy_bkg_params, x_0=x0)
        
        q_mu[i_toy] = stat.multiple_lh_ratio(toy, "fft",fix_toy, "sig_bestFit",fitSig_toy,"sig_bestFit")
        
    return q_mu

In [5]:
def multipleCI(InfoDataset, fitBkg, fitSig,toy_0,x_0, mu_fix,verbose=False,draw=False):
    
    n_toy = len(toy_0)
    
    # scan for mu
    q_mu_obs_prev = 0            # save distribution and parameters
    q_mu_prev = np.empty(n_toy)  # to plot optimal result
    q0_prev = np.empty(n_toy)
    
    r_prev = 1e10
    mu_95 = 0
    
    sign_prev = 0
        
    mu_test=mu_fix
    cross_check=0
    
    all_mu=[]
    while True:
                
        if mu_test in all_mu:
            break
        
        fix = fits.multipleFitSIG(InfoDataset, fitBkg, x_0=x_0,mu_init=mu_test,mu_vary=False)
        
        # compute likelihood ratio of observed data
        q_mu_obs = stat.multiple_lh_ratio(InfoDataset, "fft",
                                       fix, "sig_bestFit",
                                       fitSig, "sig_bestFit")
        
        if(verbose):
            print("Mu: ", mu_test, "   q(mu)_obs =", q_mu_obs)
        
        # generate toy datasets from fixed mu
        toy_fix = stat.multiple_toydataset(fix, n=n_toy, data_type="sig_bestFit")
        
        q_mu=multiple_calc_qmu(toy_fix,x_0,mu_test)
        q0=multiple_calc_qmu(toy_0,x_0,mu_test)
        
        # compute p-values
        p_mu = stat.p_value(q_mu_obs, q_mu)
        p_b  = stat.p_value(q_mu_obs, q0)
        
        # compute ratio
        r = p_mu/p_b
            
        if(verbose):
            print("p_mu =", p_mu, "  p_b =", p_b, "  ratio =", r)
            print(cross_check)
            print(all_mu, '\n')
            
        # check results to proceed with the mu scan:
        # if the ratio is close eneough to target we save the results and stop
        # else we check if we are under- or overshooting and correct the estimate
        # if we cross the target we stop and take the best result between
        # current and previous step
        
                
        #check if the next mu has been already tested
        all_mu.append(mu_test)
        
        if math.isnan(r) or math.isinf(r):
            mu_test = mu_test - 3
            if mu_test < 0: mu_test=1
            continue
            
        reached_target, crossed_target, is_current_worse = False, False, False
        
        reached_target = (np.abs(r - 0.05) <= 0.01)
        if not reached_target:
            sign = np.sign(r - 0.05)
            crossed_target = (sign*sign_prev == -1)
            
            if crossed_target:
                is_current_worse = (np.abs(r - 0.05) > np.abs(r_prev - 0.05))
                cross_check+=1
        
        # update best estimate for every case except the last
        if not is_current_worse:
            q_mu_obs_prev = q_mu_obs            
            q_mu_prev = q_mu  
            q0_prev = q0
            mu_95 = mu_test
            
        if reached_target or cross_check==2:
            cross_check=0
            break
        
        #adaptive step algorithm
        if r==0: r=0.14
        
        step=0   
        check=r-0.05
        
        if np.abs(check) > 0.5:
            step = sign*2
        elif ((np.abs(check) > 0.2) and (np.abs(check) < 0.5)):
            step = sign*7/5
        elif ((np.abs(check) > 0.08) and (np.abs(check) < 0.2)):
            step = sign
        else:
            step = sign*2/5
            
        # update mu if we did not exit the loop
        mu_test = mu_test + step*5
        r_prev = r
        sign_prev = sign

            
        if mu_test <= 0:
            mu_test=1
            
    # plot significance distribution
    if(draw):
        stat.plot_lhratio(q_mu_obs_prev, q0_prev, q_mu_prev, x_0, mu_95)
            
    return(mu_95, r)

In [6]:
def stat_test(InfoDataset,x_0,mu_fix=1.0,nToy=1000,verbose=False,draw=False):
    
    #fit background once
    fitBkg = fits.multipleFitBKG(InfoDataset)
    
    # generate toy datasets from bkg and fit them
    toy_0 = stat.multiple_toydataset(fitBkg, n=nToy, data_type="bkg_bestFit")
    
    mu_CI = np.empty(len(x_0))
    for i_x0,x0 in enumerate(x_0):
        
        print("Testing x0:",x0)
        fitSig = fits.multipleFitSIG(InfoDataset, fitBkg, x_0=x0)
        
        if i_x0 == 0:
            mu_CI[i_x0], _ = multipleCI(InfoDataset,fitBkg,fitSig,toy_0,x0,mu_fix,verbose,draw)
        
        else:
            mu_CI[i_x0], _ = multipleCI(InfoDataset,fitBkg,fitSig,toy_0,x0,mu_CI[i_x0-1],verbose,draw)
        
        print("mu_CI:",mu_CI[i_x0])
        print("---------------------")
    
    return mu_CI    

### Test

In [7]:
all_freq=np.linspace(10353286068+16*651, 10353286068+31*16*651,30)
runsData = prep.Load()

#### Range(0,5)

In [None]:
x0_1=all_freq[range(0,5)]
print(x0_1)

In [None]:
muCL1=stat_test(runsData,x0_1,mu_fix=10,verbose=True,draw=True)

In [None]:
muCL1

In [None]:
fig,ax=plt.subplots(2,1,figsize=(10,8),sharex=True)

for run in runsData:
    ax[0].plot(run["freq"],run["fft"],color="black")

for x_0 in x0_1:
    ax[0].vlines(x0_1,31000,36000,color="green",linestyle="dashed")
    
ax[1].plot(x0_1,muCL1)

#### Range(5,10)

In [None]:
x0_2=all_freq[range(5,10)]
print(x0_2)

In [None]:
muCL2=stat_test(runsData,x0_2,mu_fix=13,verbose=True,draw=True)

In [None]:
muCL2

In [None]:
fig,ax=plt.subplots(2,1,figsize=(10,8),sharex=True)

for run in runsData:
    ax[0].plot(run["freq"],run["fft"],color="black")

for x_0 in x0_2:
    ax[0].vlines(x0_2,31000,36000,color="green",linestyle="dashed")
    
ax[1].plot(x0_2,muCL2)

#### Range(10,15)

In [None]:
x0_3=all_freq[range(10,15)]
x0_3 = x0_3[1]
print(x0_3)

In [None]:
muCL3=stat_test(runsData,[x0_3],mu_fix=10,nToy=1500,verbose=True,draw=True)

In [None]:
fig,ax=plt.subplots(2,1,figsize=(10,8),sharex=True)

for run in runsData:
    ax[0].plot(run["freq"],run["fft"],color="black")

for x_0 in x0_3:
    ax[0].vlines(x0_3,31000,36000,color="green",linestyle="dashed")
    
ax[1].plot(x0_3,muCL3)

#### Range(15,20)

In [None]:
x0_4=all_freq[range(15,20)]
print(x0_4)

#It is necessary to find the appropriate initial mu
mu_fix4 = 

In [None]:
muCL4=stat_test(runsData,x0_4,mu_fix=mu_fix4,verbose=True,draw=True)

#### Range(20,25)

In [None]:
x0_5=all_freq[range(20,25)]
print(x0_5)

#It is necessary to find the appropriate initial mu
mu_fix5 = 

In [None]:
muCL5=stat_test(runsData,x0_5,mu_fix=mu_fix5,verbose=True,draw=True)

#### Range(25,30)

In [None]:
x0_6=all_freq[range(25,30)]
print(x0_6)

#It is necessary to find the appropriate initial mu
mu_fix6 = 

In [None]:
muCL6=stat_test(runsData,x0_6,mu_fix=mu_fix6,verbose=True,draw=True)

## Embarassing parallelization

In [None]:
def stat_test2(InfoDataset,x_0,mu_fix=1.0,nToy=1000,verbose=False,draw=False):
    
    #fit background once
    fitBkg = fits.multipleFitBKG(InfoDataset)
    
    # generate toy datasets from bkg and fit them
    toy_0 = stat.multiple_toydataset(fitBkg, n=nToy, data_type="bkg_bestFit")
    
    mu_CI = np.empty(len(x_0))
    r = np.empty(len(x_0))
    for i_x0,x0 in enumerate(x_0):

        
        fitSig = fits.multipleFitSIG(InfoDataset, fitBkg, x_0=x0)
        
        if i_x0 == 0:
            mu_CI[i_x0], r[i_x0] = multipleCI(InfoDataset,fitBkg,fitSig,toy_0,x0,mu_fix,verbose,draw)
        
        else:
            mu_CI[i_x0], r[i_x0] = multipleCI(InfoDataset,fitBkg,fitSig,toy_0,x0,mu_CI[i_x0-1],verbose,draw)
        
        # for intermediate check
        print(x0, mu_CI[i_x0], '\n', '\n')
        
    print(x_0)
    print(mu_CI)
    
    return np.array([x_0, mu_CI, r])

In [None]:
import multiprocess as mp

def stat_test_parallel(runsData, x0, mu_init, njobs=10):
    
    x_0 = np.split(x0, njobs)
    manager = mp.Manager()
    results = manager.list()
    
    def worker(runsData, x_0, mu_init, results):
        result = stat_test2(runsData, x_0, mu_init)
        results.append(result)
    
    processes = []
    for i in range(njobs):
        p = mp.Process(target=worker, args=(runsData, x_0[i], mu_init[i], results))
        processes.append(p)
        p.start()
    
    for p in processes:
        p.join()
        
    return list(results)

In [None]:
x0_1=all_freq[range(0,2)]
print(x0_1)

muCL1=stat_test(runsData,x0_1,mu_fix=10,verbose=True,draw=True)

fig,ax=plt.subplots(2,1,figsize=(10,8),sharex=True)

for run in runsData:
    ax[0].plot(run["freq"],run["fft"],color="black")

for x_0 in x0_1:
    ax[0].vlines(x0_1,31000,36000,color="green",linestyle="dashed")
    
ax[1].plot(x0_1,muCL1)

[1.03532965e+10 1.03533073e+10]
Testing x0: 10353296484.0
Mu:  10    q(mu)_obs = 12.56040768821913


### Test

In [None]:
all_freq=np.linspace(10353286068+16*651, 10353286068+31*16*651,30)
mu_init=[10,13,21,30,20,20]

all_freq=all_freq[:2]
mu_init = [10,10]

runsData = prep.Load()

In [None]:
muCI_results=stat_test_parallel(runsData,all_freq,mu_init,len(mu_init))

In [None]:
k = np.array(muCI_results)
k1 = k.reshape(2,3)
k1

In [None]:
plt.plot(k1[:,0], k1[:,1], 'o')
plt.show()

In [None]:
plt.plot(k1[:,0], k1[:,2], 'o')
plt.axhline(0.05)
plt.show()

In [None]:
muCI_results=stat_test_parallel(runsData,all_freq,mu_init,len(mu_init))

In [None]:
result=np.array([[10353511987.448277, 10.0], 
[10353458111.586206, 33.0 ],
[10353296484.0, 5.0],
[10353565863.310345, 15.0],
[10353468886.758621, 33.0], 
[10353350359.862068, 33.0],
[10353307259.172413, 22.0],
[10353522762.62069, 21.0],
[10353533537.793104, 26.0],
[10353479661.931034, 19.0],
[10353361135.034483, 25.0],
[10353318034.344828, 25.0],
[10353576638.48276, 33.0],
[10353490437.103449, 19.0],
[10353404235.724138, 60.0],
[10353328809.51724, 21.0],               
[10353544312.965517, 37.0],
[10353501212.275862, 11.0],
[10353587413.655172, 51.0],
[10353371910.206896, 8.0],
[10353339584.689655, 13.0],
[10353555088.137932, 24.0],
[10353382685.37931, 18.0],
[10353393460.551723, 14.0],
[10353598188.827587, 87.0],
[10353415010.896551, 11.0],
[10353425786.068966, 17.0],
[10353436561.241379, 8.0],
[10353447336.413794, 35.0],
[10353608964.0, 439.0]])

In [None]:
plt.plot(result[:,0], result[:,1], 'o')

In [None]:
# inizio 12:20

In [None]:
# 23 alle 13.31 con valore mu=18
# 24 alle 13.36 con valore mu=14
# 25 alle 13.49 con valore mu=87
# 26 alle 14.08 con valore mu=11
# 27 alle 14.40 con valore mu=17