# track

> Contains the Track class and plotting functions

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp track

In [None]:
from bokeh.io import output_notebook #|hide_line
output_notebook(hide_banner=True) #|hide_line

In [None]:
#| hide
from nbdev.showdoc import *


In [None]:
#| export
from fastcore.basics import *

from bokeh.plotting import figure

from bokeh.models import (
    Quad,
    CustomJS,
    ColumnDataSource,
    NumeralTickFormatter,
    Range1d,
    HoverTool,
)

from genomenotebook.javascript import track_callback_code

import pandas as pd


try: #pyBigWig cannot be installed on Windows
    import pyBigWig
except ImportError:
    pyBigWig = None
    
import warnings

from typing import List





In [None]:
#| hide
from genomenotebook.javascript import _get_js_code

In [None]:
#| hide
#Useful for javascript development as it is not autmatically reimported 
x_range_change_callback_code = _get_js_code("x_range_change_callback_code.js")
search_callback_code = _get_js_code("search_callback_code.js")
track_callback_code = _get_js_code("track_callback_code.js")

In [None]:
#| export
class Track:
    """ Track objects should only be created through GenomeBrowser.add_track """
    def __init__(self,
                 ylim: tuple = None, #limits of the y axis. If not specified, ylim will be set automatically with the max and min of the data plotted with Track.line, Track.scatter or Track.bar
                 height: int = 200, #size of the track
                 tools: str = "xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset", #comma separated list of Bokeh tools that can be used to navigate the plot
                 **kwargs,
                ):        
        self.height = height

        #ensuring that the active_scroll tool is part of the tools list 
        if "xwheel_zoom" not in [t.strip() for t in tools.split(',')]:
            tools+=", xwheel_zoom"

        self.tools = tools

        self.data = None

        self.ylim = ylim
        self.bokeh_figure_args = kwargs
        self.render_methods = []

        self.bokeh_args = kwargs

    def get_fig(self, x_range, width, bounds, max_glyph_loading_range, output_backend):
        fig = figure(tools=self.tools,
                          active_scroll="xwheel_zoom",
                          height=self.height,
                          y_axis_location="right", #this is required in order to keep a proper alignment with the sequence
                          x_range = x_range,
                          output_backend=output_backend,
                          **self.bokeh_figure_args)
        fig.frame_width = width
        
        fig.xaxis[0].formatter = NumeralTickFormatter(format="0,0")
        
        if self.ylim != None:
            fig.y_range=Range1d(self.ylim[0],self.ylim[1],
                                    bounds=self.ylim)
        

        loaded_range = ColumnDataSource({"start":[x_range.start-max_glyph_loading_range],
                                        "end":[x_range.end+max_glyph_loading_range], 
                                        "range":[max_glyph_loading_range]})



        #self.track_loaded_data = None
        for render_method in self.render_methods:
            render_method(self, fig, loaded_range)

        return fig

In [None]:
#| hide
t=Track(height=300)
assert t.height == 300

Adding a track with random points as a demonstration. 
genomeNotebook uses the Bokeh library and `track.fig` is a simple Bokeh figure on which you can plot anything you want using Bokeh.

In [None]:
from genomenotebook.browser import GenomeBrowser
from genomenotebook.data import get_example_data_dir
import os
import numpy as np

In [None]:
data_path = get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

In [None]:
#| export
@patch
def set_track_data_source(self:Track, 
                          data:pd.DataFrame, # data to be plotted
                          pos, 
                          columns:List[str], # columns to store as data
                         ):
    columns=[c for c in columns if c] #some arguments can be None => remove them
    self.columns = columns
    
    data=data[[pos]+columns].sort_values(pos)
    
    self.data=data

    y=columns[0] # TODO: columns[0] seems kind of arbitrary, this should probably be set in set_figure data? Or the functions for individual plot types
    if self.ylim == None:
        ymin = data[y].values.min()
        ymax = data[y].values.max()
        self.ylim = (ymin, ymax) 


@patch
def set_figure_data_source(self:Track, fig, pos, loaded_range):
    all_data = ColumnDataSource(self.data)
    data_subset = self.data.loc[(loaded_range.data["start"][0] < self.data[pos]
                 ) & (
                 self.data[pos] < loaded_range.data["end"][0])]
    loaded_data = ColumnDataSource(data_subset)
    if len(data_subset)>10**5:
        warnings.warn("You are trying to plot more than 10^5 glyphs, this might overflow your memory. \
        Consider using bounds or reducing the number of datapoints.")
    
    xcb = CustomJS(
        args = {
            "x_range": fig.x_range,
            "pos": pos,
            "all_data":all_data,
            "loaded_data": loaded_data,
            "track_loaded_range":loaded_range,
        },
            code = track_callback_code
    )
    fig.x_range.js_on_change('start', xcb)
    ymin, ymax = self.ylim
    fig.y_range=Range1d(ymin,ymax,
            bounds=(ymin,ymax))
    tooltips=[(attr,f"@{attr}") for attr in set(self.columns)]
    fig.add_tools(HoverTool(tooltips=tooltips))
    return loaded_data


In [None]:
#| export
@patch
def line(self:Track,
         data: pd.DataFrame, #pandas DataFrame containing the data
         pos: str, #name of the column containing the positions along the genome
         y: str, #name of the column containing the data to be plotted on the y-axis
         hover_data:List[str] = None, #list of column names to be shown when hovering over the data
         **kwargs #enables to pass keyword arguments used by the Bokeh function
        ):
    if hover_data is None:
        hover_data = []
    elif type(hover_data) is str:
        hover_data = [hover_data]
    elif type(hover_data) is list:
        hover_data = hover_data.copy()
    else:
        raise ValueError("hover_data must be None, str, or List")

    def render_method(track, fig, loaded_range):
        loaded_data = track.set_figure_data_source(fig, pos, loaded_range)
        fig.line(source=loaded_data, x=pos, y=y, **kwargs)
    
    self.set_track_data_source(data, pos, columns=[y]+hover_data)

    self.render_methods.append(render_method)


Additional kwargs are passed as is to [`bokeh.plotting.figure.line`](https://docs.bokeh.org/en/latest/docs/reference/plotting/figure.html#bokeh.plotting.figure.line)

#### Plotting some ChIP-seq data 

In [None]:
g=GenomeBrowser(genome_path=genome_path, 
                gff_path=gff_path, 
                init_pos=50000,
                bounds=(30000,85000), 
                search=False, 
                show_seq=False)

#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
    cov=bw.values(refname,0,g.seq_len,numpy=True)
    
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
                     "cov": cov[::10]})

track=g.add_track()
track.line(data,pos="pos",y="cov", 
           line_color="blue",
           line_width=2)

g.show()

AttributeError: unexpected attribute 'genome_path' to figure, possible attributes are above, active_drag, active_inspect, active_multi, active_scroll, active_tap, align, aspect_ratio, aspect_scale, background_fill_alpha, background_fill_color, below, border_fill_alpha, border_fill_color, center, context_menu, css_classes, disabled, extra_x_ranges, extra_x_scales, extra_y_ranges, extra_y_scales, flow_mode, frame_align, frame_height, frame_width, height, height_policy, hidpi, hold_render, inner_height, inner_width, js_event_callbacks, js_property_callbacks, left, lod_factor, lod_interval, lod_threshold, lod_timeout, margin, match_aspect, max_height, max_width, min_border, min_border_bottom, min_border_left, min_border_right, min_border_top, min_height, min_width, name, outer_height, outer_width, outline_line_alpha, outline_line_cap, outline_line_color, outline_line_dash, outline_line_dash_offset, outline_line_join, outline_line_width, output_backend, renderers, reset_policy, resizable, right, sizing_mode, styles, stylesheets, subscribed_events, syncable, tags, title, title_location, toolbar, toolbar_inner, toolbar_location, toolbar_sticky, tools, tooltips, visible, width, width_policy, x_axis_label, x_axis_location, x_axis_type, x_minor_ticks, x_range, x_scale, y_axis_label, y_axis_location, y_axis_type, y_minor_ticks, y_range or y_scale

In [None]:
#| export
from bokeh.transform import factor_cmap

In [None]:
#| export
@patch
def scatter(self:Track,
         data: pd.DataFrame, #pandas DataFrame containing the data
         pos: str, #name of the column containing the positions along the genome
         y: str, #name of the column containing the data to be plotted on the y-axis
         factors: str = None, #name of a column of values to be used as factors
         hover_data: List = None, #list of additional column names to be shown when hovering over the data
         **kwargs, #enables to pass keyword arguments used by the Bokeh function
        ):
    if hover_data is None:
        hover_data = list()
    elif type(hover_data) is str:
        hover_data = [hover_data]
    elif type(hover_data) is list:
        hover_data = hover_data.copy()
    else:
        raise ValueError("hover_data must be None, str, or List")

    def render_method(track, fig, loaded_range):
        loaded_data = track.set_figure_data_source(fig, pos, loaded_range)
        if factors!=None:
            color=factor_cmap(factors,"Category10_10",tuple(set(data[factors].values)))
            
            fig.scatter(source=loaded_data, x=pos, y=y, color=color, legend_group=factors, **kwargs)
            
            fig.legend.title = factors
            fig.legend.location = "top_left"
        else:
            fig.scatter(source=loaded_data, x=pos, y=y, **kwargs)

            

    self.set_track_data_source(data, pos=pos, columns=[y,factors]+hover_data)
    self.render_methods.append(render_method)
    


Additional kwargs are passed as is to [`bokeh.plotting.figure.scatter`](https://docs.bokeh.org/en/latest/docs/reference/plotting/figure.html#bokeh.plotting.figure.scatter)

#### Plotting some CRISPR screen data

In [None]:
import pandas as pd

In [None]:
#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()

In [None]:
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)

track=g.add_track(height=150)
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori", hover_data=["guide"])

track2=g.add_track(height=150)
track2.scatter(data=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()


In [None]:
#| export
@patch
def bar(self:Track,
         data: pd.DataFrame, #pandas DataFrame containing the data
         pos: str, #name of the column containing the positions along the genome
         y: str, #name of the column containing the data to be plotted on the y-axis
         factors: str = None, #name of a column of values to be used as factors
         hover_data: List = None, #list of additional column names to be shown when hovering over the data
         **kwargs, #enables to pass keyword arguments used by the Bokeh function
        ):
    
    if hover_data is None:
        hover_data = list()
    elif type(hover_data) is str:
        hover_data = [hover_data]
    elif type(hover_data) is list:
        hover_data = hover_data.copy()
    else:
        raise ValueError("hover_data must be None, str, or List")

    def render_method(track, fig, loaded_range):
        loaded_data = track.set_figure_data_source(fig, pos, loaded_range)
        if factors!=None:
            color=factor_cmap(factors,"Category10_3",tuple(set(data[factors].values)))
            
            fig.vbar(source=loaded_data, x=pos, top=y, color=color, legend_group=factors, **kwargs)
    
            fig.legend.location = "top_left"
            fig.legend.title = factors
        else:
            fig.vbar(source=loaded_data, x=pos, top=y, **kwargs)

    self.set_track_data_source(data, pos, columns=[y,factors]+hover_data)
    self.render_methods.append(render_method)

Additional kwargs are passed as is to [`bokeh.plotting.figure.vbar`](https://docs.bokeh.org/en/latest/docs/reference/plotting/figure.html#bokeh.plotting.figure.vbar)

Showing the same data as vertical bars

In [None]:
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000))
track=g.add_track()
track.bar(data=cui2018data,pos="pos",y="fit75",factors="ori")
g.show()


In [None]:
#| export
@patch
def highlight(self:Track,
    data: pd.DataFrame = None, #pandas DataFrame containing the data
    left_col: str = "left", #name of the column containing the start positions of the regions
    right_col: str = "right", #name of the column containing the end positions of the regions
    color_col: str = "color", #name of the column containing color of the regions
    alpha_col: str = "alpha", #name of the column containing alpha of the regions 
    left = None,
    right = None,
    color = "green",
    alpha: str = 0.2, #transparency
    hover_data: List[str] = None, #list of additional column names to be shown when hovering over the data
    **kwargs, #enables to pass keyword arguments used by the Bokeh function
    ):
    
    if hover_data is None:
        hover_data = list()
    elif type(hover_data) is str:
        hover_data = [hover_data]
    elif type(hover_data) is list:
        hover_data = hover_data.copy()
    else:
        raise ValueError("hover_data must be None, str, or List")

    if color_col not in self.data.columns:
        data[color_col] = 'green'
    if alpha_col not in self.data.columns:
        data[alpha_col] = alpha

    if data is None:
        if left is None or right is None or color is None:
            raise ValueError("If `data` is not provided, then left, right, and color must be specified")
        data = pd.DataFrame({left_col: [left], right_col: [right], color_col: [color], alpha_col: [alpha]})
    else:
        data = data.copy() # copy the dataframe because we modify it below, and users might not expect their input to be modified.
    
    def render_method(track, fig, loaded_range):
        if color not in data.columns:
            data[color_col]='green'
    
        data[alpha_col]=alpha
    
        highlight_source = ColumnDataSource(data[[left_col,right_col,color_col,alpha_col]+hover_data])
    
        if track.ylim is None:
            warnings.warn("When adding highlights to a track, ylim needs to be defined. \
                          You can eigher set ylim manually when creating the track, or plot data using Track.line, Track.scatter or Track.bar before adding the highlight.")
        
        r=Quad(left=left, right=right,
               bottom=track.ylim[0],
               top=track.ylim[1],
               fill_color="color",
               fill_alpha="alpha",
               line_alpha=0,
               **kwargs)

        renderer = fig.add_glyph(highlight_source, r)
        tooltips=[(f"{left_col} - {right_col}",f"@{left_col} - @{right_col}")]+[(f"{attr}",f"@{attr}") for attr in hover_data]
        fig.add_tools(HoverTool(renderers=[renderer],
                                            tooltips=tooltips))
    self.render_methods.append(render_method)

Additional kwargs are passed as is to [`bokeh.models.Rect`](https://docs.bokeh.org/en/latest/docs/reference/models/glyphs/rect.html#bokeh.models.Rect)

In [None]:
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})
track.highlight(data=highlight_regions, left_col="left", right_col="right", color_col="color")
g.show()

In [None]:
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")

highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})

g.highlight(data=highlight_regions, left="left", right="right", color="color", highlight_tracks=True)
g.show()

In [None]:
#| hide
#testing different column names
import pandas as pd

In [None]:
#| hide
#testing different column names
data_path = get_example_data_dir()
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

data=pd.DataFrame(dict(x=np.arange(0,50000,100),
                       y=np.sin(np.arange(0,50000,100))))

g=GenomeBrowser(gff_path=gff_path, 
                bounds=(0,50000),
                toolbar_location="above",
                search=False)

track = g.add_track(height=200, 
                    toolbar_location="above")

track.bar(data=data, pos="x", y="y")
g.show()

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()