In [None]:
# @IMPORT-MERGE
import numpy as np
from plaster.run.run import RunResult
from plaster.run.job import JobResult
from plaster.run.plots import plots
from plaster.run.call_bag import CallBag
from munch import Munch
from plaster.tools.ipynb_helpers.displays import hd, qgrid_mono
from plaster.tools.zplots import zplots
z = zplots.setup()

In [None]:
# @REMOVE-FROM-TEMPLATE
from plumbum import local
job = JobResult("/erisyon/internal/jobs_folder/bmf_2020_07_23_02_hemolysate_classify_v2")
run = job.runs[0]

# Signal Classification

In [None]:
# USE the slider bar at the bottom of this cell to set the minimum score threshold
# NOTE that you need to click on the "Run Interact" button after setting sliders
# OPTIONALLY check the box to save a copy of the data as a csv
from ipywidgets import interactive, FloatSlider, IntSlider, Checkbox
from IPython.core.display import display, Markdown

def filter_by_score(run,min_score,save_csv,top_n_classes):
    bag = run.classify_rf_call_bag()
    good_score_iz = bag.scores > min_score
    pred_counts = np.bincount(bag.pred_pep_iz[good_score_iz], minlength=run.prep.n_peps)
    pred_counts_df = pd.DataFrame(
        dict(pep_i=np.arange(run.prep.n_peps), pred_counts=pred_counts)
    )

    pred_counts_df = (
        pred_counts_df.set_index("pep_i")
        .join(run.prep.pros__peps__pepstrs().set_index("pep_i"), how="left")
        .sort_index()
        .reset_index()
    )
    cols_to_show = ["pep_i", "pred_counts", "pro_id", "pro_is_decoy", "seqstr"]
    display(pred_counts_df.nlargest(top_n_classes, "pred_counts")[cols_to_show])
    if save_csv:
        min_score_text = str(min_score).replace('.','pt')
        pred_counts_df.nlargest(top_n_classes, "pred_counts")[cols_to_show].to_csv('pred_counts_'+min_score_text+'.csv',float_format="%g")
    # overview
    accepted_counts = pred_counts_df.pred_counts.sum()
    total_counts = bag.n_rows
    pd.options.display.max_rows = None
    display(Markdown(f"## {accepted_counts} spots accepted out of {total_counts} spots classified"))



    from plaster.tools.zplots.zplots import ZPlots

    z = ZPlots()
    with z(_cols=2, fill_alpha=0.5, line_alpha=0.05):
        with z(f_title=f"Classification, fraction by index"):
            z.cols(pred_counts, color=z.compare1, legend_label="all classes")

        with z(f_x_range=[0, 1], f_title="Score distribution"):
            z.hist(bag.scores, color=z.compare2)

def handle_submit(min_score,save_csv,top_n_classes):
    filter_by_score(run,min_score,save_csv,top_n_classes)

dropdown_min_score = interactive(handle_submit, {'manual': True}, \
                                 save_csv=Checkbox(False, description='save as csv'), \
                                 min_score=FloatSlider(min=0.01, max=.99, step=0.01), \
                                 top_n_classes=IntSlider(min=1,max=200,step=1))
display(dropdown_min_score)

