# Gene selection widget prototype

This implemets a searchable list of genes, of which multiple can me selected (Cmd-click).

## Possible extensions

* Speed up updates to options in each selector. Takes a while when it's a long list.
* Figure out a better api for formatting gene options

# Setup

In [None]:
import matplotlib as mpl
mpl.use("Agg") # Only for output example
import matplotlib.pyplot as plt

In [None]:
import pandas as pd
import numpy as np
import scanpy.api as sc
import ipywidgets
from functools import partial
from itertools import repeat
import io

In [None]:
from ipywidgets import Text, SelectMultiple, Button, Image, HBox, VBox, Output
from ipywidgets import GridBox, Layout

Preprocess data

In [None]:
# An already processed anndata
# adata = sc.read("../data/CellBench10X.h5ad")
adata = sc.read("../data/CellBench10X_noraw.h5ad", backed="r") # also works with a backed anndata

In [None]:
# Something I'd like to handle better
adata.var["search_field"] = adata.var["gene_symbol"].astype(str) + " (" + adata.var_names.values + ")"

# Basic example

**NOTE:** These require the mpl inline backend, but importing that makes the caching example not work.

## Callbacks

In [None]:
def sorting_callback(search):
    options = pd.Series(selection.options)
    new_options = options.copy()
    is_match = options.str.contains(search.new, case=False)
    found = options[is_match].values
    found.sort()
    new_options.iloc[:len(found)] = found
    new_options.iloc[len(found):] = options[~is_match].values
    selection.options = new_options

In [None]:
def plot_selected(selected):
    selected_indices = adata.var_names[adata.var["search_field"].isin(selected.new)]
    out.clear_output()
    with out:
        sc.pl.umap(adata, color=selected_indices)

## Plotting

In [None]:
# Widgets
out = ipywidgets.Output()
searchbar = ipywidgets.Text(value="search here", continuous_update=False)
selection = ipywidgets.SelectMultiple(options=adata.var["search_field"])

# Callbacks
searchbar.observe(sorting_callback, names=["value"])
selection.observe(plot_selected, names=["value"])

# Output
ipywidgets.VBox([searchbar, selection, out])

In [None]:
def gene_selector(adata):
    left_search = Text("search here", continuous_update=False)
    right_search = Text("seach here", continuous_update=False)
    
    left_options = SelectMultiple(options=adata.var["search_field"])
    right_options = SelectMultiple(options=[])
    
    move_right = Button(description=">>")
    move_left = Button(description="<<")
    
    plots = Output()
    
    def sorting_callback(search, selection):
        options = pd.Series(selection.options)
        new_options = options.copy()
        is_match = options.str.contains(search.new, case=False)
        found = options[is_match].values
        found.sort()
        new_options.iloc[:len(found)] = found
        new_options.iloc[len(found):] = options[~is_match].values
        selection.options = new_options
        
    def move_selection(button, orig, dest):
        """
        Args:
            button:
                Button which triggers callback
            orig:
                Selector options are moving from
            dest:
                Selector options are moving to
        """
        dest_new_opts = list(orig.value)
        orig_new_opts = list()
        for option in orig.options:
            if option not in orig.value:
                orig_new_opts.append(option)
        dest_new_opts.extend(dest.options)
        dest.values = []
        dest.options = dest_new_opts
        orig.options = orig_new_opts
        
    def plot_selected(selected, out):
        selected_indices = adata.var_names[adata.var["search_field"].isin(selected.new)]
        out.clear_output()
        if len(selected_indices) > 0:
            with out:
                sc.pl.umap(adata, color=selected_indices)
        
    left_search.observe(partial(sorting_callback, selection=left_options), names=["value"])
    right_search.observe(partial(sorting_callback, selection=right_options), names=["value"])
    
    move_right.on_click(partial(move_selection, orig=left_options, dest=right_options))
    move_left.on_click(partial(move_selection, orig=right_options, dest=left_options))
    
    right_options.observe(partial(plot_selected, out=plots), names=["options"])
    
    layout = VBox([
        HBox([
            VBox([left_search, left_options]),
            VBox([move_right, move_left]),
            VBox([right_search, right_options])
        ]),
        plots
    ])
    
    return layout

In [None]:
gene_selector(adata)

## Experiment: filtering search instead of sorting (failed)

In this version, I'll only update with values that match the search, possibly speeding up the process.

This adds a a lot of complication to the code. For example, if move some values over and there is a search applied, do I show them? Do I now have to retrigger the search? It might not be worth it. Moving the search to javascript might be the way to go.

In [None]:
def gene_selector(adata):
    left_search = Text("search here", continuous_update=False)
    right_search = Text("seach here", continuous_update=False)
    
    left_options = adata.var["search_field"].values
    right_options = pd.Series()
    
    left_selector = SelectMultiple(options=left_options)
    right_selector = SelectMultiple(options=[])
    
    move_right = Button(description=">>")
    move_left = Button(description="<<")
    
    plots = Output()
    
    def search_callback(search, options, selection):
        """
        Update selection with fields from options which contain search.

        Args:
            search (str):
                Search term/ regex.
            options (Sequence[str])
            selection (SelectMultiple)
        """
        options = pd.Series(options)
        is_match = options.str.contains(search.new, case=False)
        new_options = options[is_match]
        new_options.sort_values()
        selection.options = new_options
        
    def move_selection(button, orig, dest, orig_options, dest_options):
        """
        Args:
            button:
                Button which triggers callback
            orig:
                Selector options are moving from
            dest:
                Selector options are moving to
            orig_options:
                Options for selector orig
            dest_options:
                Options for selector dest
        """
        dest_new_opts = dest_options
        orig_new_opts = orig_options
        for option in orig.options:
            if option not in orig.value:
                orig_new_opts.append(option)
        dest_new_opts.extend(dest.options)
        dest.values = []
        dest.options = dest_new_opts
        orig.options = orig_new_opts
        
    def plot_selected(selected, out):
        selected_indices = adata.var_names[adata.var["search_field"].isin(selected.new)]
        out.clear_output()
        if len(selected_indices) > 0:
            with out:
                sc.pl.umap(adata, color=selected_indices)
        
    left_search.observe(partial(search_callback, options=left_options, selection=left_selector), names=["value"])
    right_search.observe(partial(search_callback, options=right_options, selection=right_selector), names=["value"])
    
    move_right.on_click(partial(move_selection, orig=left_selector, dest=right_selector,
                               orig_options=left_options, dest_options=left_options))
    move_left.on_click(partial(move_selection, orig=right_selector, dest=left_selector, 
                               orig_options=right_options, dest_options=left_options))
    
    right_selector.observe(partial(plot_selected, out=plots), names=["options"])
    
    layout = VBox([
        HBox([
            VBox([left_search, left_selector]),
            VBox([move_right, move_left]),
            VBox([right_search, right_selector])
        ]),
        plots
    ])
    
    return layout

In [None]:
gene_selector(adata)

## Experiment: Caching plots

* I would like to cache plots between selections, this should make displaying them faster especially when I'm working with more cells.
* This was reaaaaally slow once. Not sure what to make of that. Generally, this is much faster.
* This one requires non-inline backend, or you'll get plots returned multiple times. Not sure what to do about that.

In [None]:
# Define callbacks
def sorting_callback(search, selection):
    """
    Args:
        search (str):
            Search term/ regex.
        selection (SelectMultiple)
    """
    options = pd.Series(selection.options)
    new_options = options.copy()
    is_match = options.str.contains(search.new, case=False)
    found = options[is_match].values
    found.sort()
    new_options.iloc[:len(found)] = found
    new_options.iloc[len(found):] = options[~is_match].values
    selection.options = new_options

def move_selection(button, orig, dest):
    """
    Args:
        button:
            Button which triggers callback
        orig:
            Selector options are moving from
        dest:
            Selector options are moving to
    """
    dest_new_opts = list(orig.value)
    orig_new_opts = list()
    for option in orig.options:
        if option not in orig.value:
            orig_new_opts.append(option)
    dest_new_opts.extend(dest.options)
    dest.values = []
    dest.options = dest_new_opts
    orig.options = orig_new_opts

def plot_selected(adata, selected, plot_grid, plot_cache):
    """
    Args:
        adata (anndata.AnnData):
            AnnData object to be plotting from.
        selected:
            Object from selection callback
        plot_grid (ipywidgets.GridBox):
            Grid box to put plots in.
        plot_cache (dict):
            Cache of previously rendered plots.
    """
    selected_items = adata.var.loc[adata.var["search_field"].isin(selected.new), "search_field"]
    for index, option in selected_items.iteritems():
        if option not in plot_cache:
            fig = sc.pl.umap(adata, color=index, show=False, title=option).figure
            with io.BytesIO() as byteio:
                fig.savefig(byteio, format="png")
                img = ipywidgets.Image(value=byteio.getvalue(), format="png")
            plt.close(fig)
            plot_cache[option] = img
    plotlist = []
    if len(selected_items) > 0:
        for option in selected_items:
            plot = plot_cache[option]
            plotlist.append(plot)
    plot_grid.children = plotlist

In [None]:
def gene_selector(adata, ncols=3):
    # Define elements
    left_search = Text("search here", continuous_update=False)
    right_search = Text("seach here", continuous_update=False)
    
    left_selector = SelectMultiple(options=adata.var["search_field"])
    right_selector = SelectMultiple(options=[])
    
    move_right = Button(description=">>")
    move_left = Button(description="<<")
    
    plot_grid = GridBox(layout=Layout(grid_template_columns=" ".join(repeat("1fr", ncols))))
    plot_cache = {}
    
    # Register callbacks
    left_search.observe(partial(sorting_callback, selection=left_selector), names=["value"])
    right_search.observe(partial(sorting_callback, selection=right_selector), names=["value"])
    
    move_right.on_click(partial(move_selection, orig=left_selector, dest=right_selector))
    move_left.on_click(partial(move_selection, orig=right_selector, dest=left_selector))
    
    right_selector.observe(partial(plot_selected, adata, plot_grid=plot_grid, plot_cache=plot_cache), names=["options"])
    
    # Define layout
    layout = VBox([
        HBox([
            VBox([left_search, left_selector]),
            VBox([move_right, move_left]),
            VBox([right_search, right_selector])
        ]),
        plot_grid
    ])
    
    return layout

In [None]:
gene_selector(adata, ncols=2)