In [None]:
from trackanalysis import *
from simulator.benchmark import PeakBenchmarkJob
from peakfinding.processor.projection import PeakProjectorTask
Tasks.tasksmodel().instrument = 'sdi'

In [None]:
# Launch clusters on friedrich
# 1. add the .ssh/config file with:
#     """
#     Host friedrich
#        HostName friedrich
#        User depixus
#     """
# 2. execute:
#     """
#     ssh depixus@friedrich
#     cd /home/depixus/trackanalysis/build
#     ipcluster start -n 7
#     """
# 3. exectute the current cell
%sx scp depixus@friedrich:/home/depixus/.ipython/profile_default/security/ipcontroller-client.json /tmp/ipcontroller-client.json
import ipyparallel as ipp
client = ipp.Client("/tmp/ipcontroller-client.json", sshserver = "depixus@friedrich")
dv     = client[:]

In [None]:
def _createdata(baseline = 1e-3, configs = None):
    dv.block = True
    job = PeakBenchmarkJob()
    job.experiment.nbindings = range(5, 20)
    job.experiment.template.thermaldrift   = None
    job.experiment.template.brownianmotion = 1e-3
    job.experiment.template.baseline.sigma = baseline
    if configs is None:
        job.configurations = {
            "classic":   [Tasks.cleaning(), Tasks.alignment(), Tasks.eventdetection(), Tasks.peakselector()],
            "histogram": [Tasks.cleaning(), Tasks.alignment(), PeakProjectorTask()]
        }
    else:
        job.configurations = configs
    job.nbeads = 10
    
    dv['job'] = job
    items = dv.apply(lambda: job.run(40))
    for i, j in enumerate(items):
        j['pid'] = np.ones(len(j), dtype = 'i4')*i
    data = pd.concat(items)
    data['dist'] = np.abs(data.z-data.truez)
    return data
%time DATA_B05 = _createdata(.5e-3)
%time DATA_B1 = _createdata()
%time DATA_B2 = _createdata(2e-3)

DATA_B05.config.replace({i: "B05_"+i.split("_")[-1] for i in DATA_B05.config.unique()}, inplace = True)
DATA_B05['baseline'] = .5e-3
DATA_B1.config.replace({i: "B1_"+i.split("_")[-1] for i in DATA_B1.config.unique()}, inplace = True)
DATA_B1['baseline'] = 1e-3
DATA_B2.config.replace({i: "B2_"+i.split("_")[-1] for i in DATA_B2.config.unique()}, inplace = True)
DATA_B2['baseline'] = 2e-3
DATA = pd.concat([DATA_B05, DATA_B1, DATA_B2])

In [None]:
out = pd.pivot_table(DATA[DATA.peaktype != "base"], index = "config", columns = "peaktype", aggfunc = "count", values = "bead").fillna(0)
wgt = 1./DATA[DATA.peaktype != "base"].groupby("config").bead.count()
for i in out:
    out[i] *= wgt
out.rename(columns = {"base": "baseline", "bind": "TP"}).style.format("{:.1%}")

In [None]:
def _hist(data, name, **kwa):
    def _build(tpe):
        out  = data[(data.config ==tpe) & (data.peaktype == "bind")][name]
        vals = np.histogram(out.dropna(), weights = np.ones(len(out), dtype = 'f4')*100./len(out), **kwa)
        xv   = vals[1][1:]*.5+vals[1][:-1]*.5
        yv   = vals[0].cumsum()
        return (
            hv.Curve((xv, vals[0]), label = tpe, group = "ratio").options(line_dash= "dashed")
            *hv.Curve((xv, yv), group = "sum", label = tpe+": cumsum")
        ).redim(x = name, y = hv.Dimension("cnt", label = "% count"))
    return (
        hv.NdOverlay({i: _build(i) for i in data.config.unique()})
        .redim(dist= "Z precision").redim.range(cnt = (0, 100))
        .options(show_grid = True)
    )

_hist(DATA, "dist", bins = np.linspace(0, 4e-3, 20), normed= False)

In [None]:
(
    DATA
    .groupby(["pid", "run", "track", "bead", "config"])
    .first().reset_index()
    .groupby("config")
    .agg({"clock": ["mean", "std"]})
    .style.format("{:.3f} s")
)

In [None]:
def _hist(data, name, **kwa):
    def _build(tpe):
        out = data[(data.config ==tpe)]
        h1 = np.histogram(out[~out.z.isna()][name].dropna(), **kwa)
        h2 = np.histogram(out[name].dropna(), **kwa)
        ratio = h1[0].astype('f4')
        ratio[h2[0] > 0] /= h2[0][h2[0]>0]
        ratio = ratio[h2[0] > 0]*100
        xv    = ((h1[1][:-1]+h1[1][1:])*.5)[h2[0] > 0]
        err   = 1./(np.sqrt(h2[0])[h2[0] > 0])*100
        errinf = np.copy(err)
        errinf[err>ratio] = ratio[err>ratio]
        errsup = np.copy(err)
        errinf[(err+ratio)>100] = (100-ratio)[(err+ratio)>100]
        out   = list(zip(xv, ratio, errinf, errsup))
        return (
            hv.Curve(out, label = tpe)
            *hv.ErrorBars(out)
        ).redim(y = "% found", x = name)
    return hv.NdOverlay({i: _build(i) for i in data.config.unique()})

hv.NdLayout({
    j: (
        _hist(DATA, i, normed = False, bins = k)
        .redim.label(**{i:j})
        .options(show_grid = True)
    )
    for i, j, k in (
        ('delta', "Distance to another binding position", np.linspace(0., 2e-2, 20)),
        ('truet', "Hybridisation time", np.linspace(1./30., 30/30., 31)),
        ('truer', "Hybridization rate", np.linspace(0.01, .13, 20)),
    )
}).cols(1)