## Fitting functions and helpers

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
%matplotlib inline

In [4]:
import scipy
import sys
sys.path.append("../louisa/")

from functions_for_final_dataset import *

log_progress helper, https://github.com/kuk/log-progress

In [1]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

In [2]:
def get_gates_dict(kw):
    gate_border_values = pd.read_csv(f"../../data/{kw}/{kw}_03_gate_border_values.txt",delimiter='\t')
    gates_dict = {}
    gates_dict['POI'] = kw
    gates_dict['x_lower_A'] = gate_border_values[gate_border_values.gate.isin([f'{kw}_{k}A' for k in gates] )].lower_limit.values
    gates_dict['x_upper_A'] = gate_border_values[gate_border_values.gate.isin([f'{kw}_{k}A' for k in gates] )].upper_limit.values
    gates_dict['x_mean_A'] = np.mean(np.vstack([gates_dict['x_lower_A'],gates_dict['x_upper_A']]),axis=0)
    gates_dict['x_lower_B'] = gate_border_values[gate_border_values.gate.isin([f'{kw}_{k}B' for k in gates] )].lower_limit.values
    gates_dict['x_upper_B'] = gate_border_values[gate_border_values.gate.isin([f'{kw}_{k}B' for k in gates] )].upper_limit.values
    gates_dict['x_mean_B'] = np.mean(np.vstack([gates_dict['x_lower_B'],gates_dict['x_upper_B']]),axis=0)
    return gates_dict

In [None]:
def gates_positions_figure(gates_dict):
    plt.figure(facecolor='w',figsize=(7,4))
    
    y = range(3,11)
    
    for p in zip(y,gates_dict['x_lower_A'],gates_dict['x_upper_A']):
        plt.plot([p[1],p[2]],[p[0],p[0]],'|-r')
        plt.axvline(p[1],ymin=0.4,alpha=.3,color='r',lw=1.)
        plt.axvline(p[2],ymin=0.4,alpha=.3,color='r',lw=1.)
        
    
    for p in zip(y,gates_dict['x_lower_B'],gates_dict['x_upper_B']):
        plt.plot([p[1],p[2]],[-1.*p[0],-1.*p[0]],'|-b')
        plt.axvline(p[1],ymax=0.6,alpha=.3,color='b',lw=1.)
        plt.axvline(p[2],ymax=0.6,alpha=.3,color='b',lw=1.)
    #     print(p)

    plt.plot([None],'v-r',label='A')
    plt.plot([None],'|-b',label='B')

    plt.ylabel("Gate #")
    plt.xlabel("Gate border values")
    plt.legend(loc=2,bbox_to_anchor=[1,1.05])
    plt.title(f"Gates {gates_dict['POI']}")
    yt = list(range(-10,-2))
    yt.extend(range(3,11))

    plt.yticks(yt,[f'{abs(k)}' for k in yt])
    plt.grid(axis='y')

In [17]:
f = lambda x_,mu_,sigma_: scipy.stats.norm(mu_,sigma_).cdf(x_)

def guess(x,y,xm):
    initial_guess_x = y.argmax()

    initial_guess_sigma=x[initial_guess_x]-xm[initial_guess_x]
        
    return [xm[initial_guess_x],initial_guess_sigma]

def fit(x,y,xm):
    g=guess(x,y,xm)
    success = False
    try:
        bounds=([0.,0.],[1.5*x[-1],x[-1]])
        popt,pcov =  scipy.optimize.curve_fit(f,x,y,p0=g,maxfev=5000,bounds = bounds)
        mu,sigma = popt[0],popt[1]
        pmu = pcov[0,0]**0.5
        psigma = pcov[1,1]**0.5
        success = True

    except:
        mu,sigma = g
        pmu = np.NaN
        psigma = np.NaN
        success = False

    return {'converged':success,'mu':mu,'sigma':sigma,'pmu':pmu,'psigma':psigma}

def fit_job(df_,gd,progress=True,gates=gates):
    res = []
    for p_ in log_progress(df_.iterrows(),every=100,size=df_.shape[0]):
        gn = np.divide(p_[1][gates].values.astype(np.float64),np.sum(p_[1][gates].values.astype(np.float64)))
        y = np.cumsum(gn)
        assert y[-1]==y.max()
        assert not np.isnan(np.sum(y))
        x = np.array(gd[f'x_upper_{p_[1]["machine"]}'])
        xm = np.array(gd[f'x_mean_{p_[1]["machine"]}'])

        r = fit(x,y,xm)
        r['original_index'] = p_[0]
        res.append(r)
    return res
        

In [11]:
def demoplot(fit_results,original_df,gates_dict,title):
    t = original_df
    tf = fit_results

    for tfi in tf:
        plt.figure(figsize=(5,2))
        orig = t.loc[tfi['original_index']]
        for p in zip(gates_dict[f'x_lower_{orig["machine"]}'],gates_dict[f'x_upper_{orig["machine"]}']):
            plt.axvline(p[0],alpha=.3,color='b',lw=.5)
            plt.axvline(p[1],alpha=.3,color='b',lw=.5)
        x = gates_dict[f'x_upper_{orig["machine"]}']
        xm = gates_dict[f'x_mean_{orig["machine"]}']

        plt.plot(xm,orig[gates],'.')
        plt.twinx()
        plt.yticks([])
        plt.plot(x,np.cumsum(orig[gates]),'x',ms=10)
        plt.twinx()
        plt.yticks([])

        plt.plot(x,f(x,tfi['mu'],tfi['sigma']),'-r')
        plt.axvline(tfi['mu'],ls='--',color='r',ymax=0.8,ymin=0.3,alpha=.5)
        plt.axvspan(xmin=tfi['mu']-tfi['sigma'],xmax=tfi['mu']+tfi['sigma'],alpha=.1,ymax=0.8,ymin=0.3)

        plt.title(f"{title} machine:{orig['machine']}\n{orig['primary_bc']} {orig['secondary_bc']}\n{tfi['mu']:.0f} ± {tfi['sigma']:.0f} ")
        if not tfi['converged']:
                  plt.text(0.5,0.5,'F A I L',fontsize=36)
