In [None]:
# @REMOVE-FROM-TEMPLATE
from plaster.tools.ipynb_helpers.displays import restart_kernel; restart_kernel()

In [None]:
# @IMPORT-MERGE
import os
os.environ["MPLCONFIGDIR"] = "/tmp"
import numpy as np
import pandas as pd
import itertools
import cv2
import random
from scipy.stats import lognorm
from IPython.display import HTML, display
from plaster.tools.log.log import error, debug
from plaster.run.job import JobResult
from plaster.run.run import RunResult
from plaster.run.sigproc_v2 import sigproc_v2_worker as worker
from plaster.run.sigproc_v2.sigproc_v2_result import df_filter, radmat_from_df_filter, df_to_radmat
from plaster.run.plots import plots, plots_dev
from plaster.run.plots.plots_sigproc import plot_psfs, circle_locs, sigproc_v2_im, sigproc_v2_movie_from_df, sigproc_v2_im_from_df
from plaster.run.plots.plots_sigproc import wizard_xy_df, wizard_scat_df, wizard_raw_images
from plaster.run.sigproc_v2.synth import Synth
from plaster.tools.image.coord import WH, XY, roi_shift, clip2d
from plaster.tools.utils import data
from plaster.tools.zplots import zplots
from plaster.tools.schema import check
from plaster.tools.image import imops
from plaster.tools.zap import zap
from plaster.tools.utils import utils
from plaster.tools.utils import data
from plaster.run.calib.calib import Calib
from plaster.tools.ipynb_helpers.displays import hd, movie
z = zplots.setup()

In [None]:
# @REMOVE-FROM-TEMPLATE
from plumbum import local
job = JobResult("/erisyon/internal/jobs_folder/abbe7_1t")
run = job.runs[0]

In [None]:
which_nn_v2 = "nn_v2"

n_channels = run.sigproc_v2.n_channels
n_cycles = run.sigproc_v2.n_cycles
dyemat = run[which_nn_v2]._dyemat
dyepeps = run[which_nn_v2]._dyepeps
calls_df = run[which_nn_v2].calls("sigproc")
sigproc_df = run.sigproc_v2.fields__n_peaks__peaks__radmat()

beta = run[which_nn_v2].params.gain_model.channels[0].beta
row_k_score_factor = run[which_nn_v2].params.row_k_score_factor
row_k_score_factor = run[which_nn_v2].params.row_k_score_factor
row_k_sigma = run[which_nn_v2].params.gain_model.row_k_sigma
run_row_k_fit = run[which_nn_v2].params.run_row_k_fit
debug(beta, row_k_score_factor, row_k_sigma, run_row_k_fit)

# Show parameter distributions

In [None]:
n_dyts = dyemat.shape[0]
with z(_cols=3, _remove_nan=True, _size=300):
    pred_dyt_iz = calls_df.dyt_i
    
    z.hist(calls_df.k, _bins=(0, 4, 200), f_title="k", f_x_axis_label="row_k")
    z.hist(pred_dyt_iz, _bins=(0, n_dyts, n_dyts), f_title="dyt_i, ie cy-off (excl. cy=0 and last)", f_x_axis_label="cy assign")
    z.hist(calls_df.dyt_score, _bins=(0, 1.0, 300), f_title="dyt_score", f_x_axis_label="dyt_score")
    z.hist(calls_df.score, _bins=(0, 1.0, 300), f_title="scores", f_x_axis_label="score")
    
    z.hist(np.nan_to_num(calls_df.logp_dyt), _bins=(-600, -250, 100), f_title="logp_dyt", f_x_axis_label="logp_dyt")
    z.hist(np.nan_to_num(calls_df.logp_k), _bins=(-50, 0, 100), f_title="logp_k", f_x_axis_label="logp_k")

In [None]:
def apply_filter(calls_df, sigproc_df, n_subsample_peaks=None, **kwargs):
    """
    Combine the dataframes from sigproc and nn_v2
    so that we can filter on both column sets.
    """    
    if n_subsample_peaks is not None:
        calls_df = calls_df.sample(n_subsample_peaks)
    
    joined_df = calls_df.set_index("peak_i").join(
        sigproc_df.set_index("peak_i")
    ).reset_index().rename(columns=dict(index="peak_i"))

    # REMOVE anything assigned to a dyt that goes dark after 0th cycle
    zero_cy_1_dyt_iz = np.argwhere(dyemat[:, 1] == 0).flatten()
    for remove_cy_i in zero_cy_1_dyt_iz:
        joined_df = joined_df[joined_df.dyt_i != remove_cy_i]

    radmat, filt_sig_df = radmat_from_df_filter(joined_df, channel_i=0, return_df=True, **kwargs)

    keep_peaks_iz = filt_sig_df.peak_i.unique()
    filt_cal_df = calls_df.set_index("peak_i").loc[keep_peaks_iz].reset_index()
    
    n_rows = radmat.shape[0]
    keep_rows = np.ones((n_rows,), dtype=bool) #filt_cal_df.score > 0.20
    
    # TODO: Try some other filtering?

    return radmat[keep_rows], filt_cal_df[keep_rows], joined_df

In [None]:
filt_radmat, filt_cal_df, joined_df = apply_filter(calls_df, sigproc_df, max_k=1.5, monotonic=None)

In [None]:
filt_balanced = filt_radmat / filt_cal_df.k[:, None]
stack_im = np.hstack((filt_radmat, filt_balanced, 5000*dyemat[filt_cal_df.dyt_i]))

with z(_cspan=(0, 20000), _size=800):
    im = stack_im[np.argsort(filt_cal_df.dyt_i)]
    z.im(im[0::5], f_title=f"Raw vs. k-balanced, filter nul assignments, sorted by pred_dyt_i")

# Summary of counts

In [None]:
# COUNT dyts of various counts
one_count_dyt_iz = np.argwhere(np.all(dyemat <= 1, axis=1)).flatten()
two_count_dyt_iz = np.argwhere(np.all(dyemat <= 2, axis=1) & np.any(dyemat == 2, axis=1)).flatten()
three_count_dyt_iz = np.argwhere(np.all(dyemat <= 3, axis=1) & np.any(dyemat == 3, axis=1)).flatten()

one_rows = np.isin(calls_df.dyt_i, one_count_dyt_iz)
two_rows = np.isin(calls_df.dyt_i, two_count_dyt_iz)
three_rows = np.isin(calls_df.dyt_i, three_count_dyt_iz)

n_rows_total = sigproc_df.peak_i.max() + 1
n_rows_keep = len(calls_df)
n_ones = one_rows.sum()
n_twos = two_rows.sum()
n_threes = three_rows.sum()

print(f"""
n_rows_total {n_rows_total:>8d}
n_rows_keep  {n_rows_keep:>8d} {100 * n_rows_keep / n_rows_total:>5.1f}%
n_ones       {n_ones:>8d} {100 * n_ones / n_rows_total:>5.1f}%
n_twos       {n_twos:>8d} {100 * n_twos / n_rows_total:>5.1f}%
n_threes     {n_threes:>8d} {100 * n_threes / n_rows_total:>5.1f}%
""")


# Distributions of counts

In [None]:
run[which_nn_v2].params.gain_model

In [None]:
# From abbe7_1t
# beta: 4444.0
# sigma: 0.15

orig_beta = run[which_nn_v2].params.gain_model.channels[0].beta
orig_sigma = run[which_nn_v2].params.gain_model.channels[0].sigma

beta = orig_beta #* 0.95
sigma = orig_sigma #* 1.30
debug(beta, sigma)

with z(_cols=4, _size=250):
    for cnt in range(1, 3):
        d = filt_balanced[dyemat[filt_cal_df.dyt_i] == cnt]
        model_samples = lognorm.rvs(scale=beta * cnt, s=sigma, size=len(d))
        with z(_merge=True, _bins=(0, 15_000, 500), alpha=0.3, f_title=f"cnt={cnt}"):
            z.hist(d, color="blue")
            z.hist(model_samples, color="red")
    

# Examine raw movies with highlights of row that are highly variable
## Where "highly variable" means what?
* Large displacement of k?
* Large variance of the 0, 1, or 2 count areas?
* Low score?
* Low SNR?

In [None]:
joined_df["clip_snr"] = joined_df.signal.clip(lower=0) / joined_df.noise
joined_df

In [None]:
mask = (joined_df.signal > 0) & (joined_df.signal < 7000)

pfit = np.polyfit(x=joined_df.signal.values[mask], y=joined_df.clip_snr.values[mask], deg=1)
p = np.poly1d(pfit)
joined_df["corr_snr"] = (joined_df.clip_snr.values - p(joined_df.signal)) / p(joined_df.signal)

# TODO: Make corr_snr a pseudo-zscore not a simple value
# No this is wrong

# df = (
#     joined_df[joined_df.cycle_i == 1][["peak_i", "corr_snr"]]
#     .set_index("peak_i")
#     .rename(columns=dict(corr_snr="cy1_corr_snr"))
#     .join(joined_df.set_index("peak_i"))
#     .reset_index(drop=True)
# )

with z(_cols=3):
    with z(_merge=True):
        z.scat(x=df.signal.values, y=df.clip_snr.values, _n_samples=1000, alpha=0.5)
        x = np.linspace(0, 10000)
        z.line(x=x, y=p(x), color="red")
#         z.line(x=x, y=p(1.5*x), color="red")
#         z.line(x=x, y=p(0.5*x), color="red")

#     z.hist(df.corr_snr[df.clip_snr > 0])
#     z.hist(df.cy1_corr_snr)
    z.hist(df.score)
    

In [None]:
field_df = df[df.field_i == 1]

#show_df = field_df[field_df.k > 1.3].reset_index()
#show_df = field_df[field_df.k < 0.7].reset_index()
#show_df = field_df[(0.9 < field_df.k) & (field_df.k < 1.1)].reset_index()
#show_df = field_df[field_df.k > 1.3].reset_index()

#show_df = field_df[field_df.snr > 1.3].reset_index()
#show_df = field_df[  np.abs(field_df.cy1_corr_snr) > 0.4 ].reset_index()
show_df = field_df[ field_df.score < 0.1 ].reset_index()

sigproc_v2_movie_from_df(
    run, show_df, fg_only=True,
    _cspan=(0, 500), outer_radius=2,
    yx=(100, 200), hw=(200, 200)
) 