# track

> Contains the Track class and plotting functions

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp track

In [None]:
#| hide
from nbdev.showdoc import *


In [None]:
#| export
from fastcore.basics import *

from bokeh.plotting import figure

from bokeh.models import (
    CustomJS,
    ColumnDataSource,
    NumeralTickFormatter
)

from Bio import SeqIO
from genomenotebook.js_callback_code import (
    track_callback_code,
    get_example_data_dir
)

import pandas as pd

from bokeh.io import output_notebook
import os

try: #pyBigWig cannot be installed on Windows. This might make it possible for windows users to still install
    import pyBigWig
except ImportError:
    pyBigWig = None
    
import warnings

In [None]:
#| hide
output_notebook()

In [None]:
#| hide
#Useful for javascript development as it is not autmatically reimported 

file_path = "../../genomenotebook/javascript/x_range_change_callback_code.js"
# Open the file and read its contents
with open(file_path, 'r') as handle:
    x_range_change_callback_code = ''.join(handle.readlines())

file_path = "../../genomenotebook/javascript/search_callback_code.js"
with open(file_path, 'r') as handle:
    search_callback_code =''.join(handle.readlines())

In [None]:
#| export
class Track:
    """ Track objects should only be created through GenomeBrowser.add_track """
    def __init__(self,
                 height: int = 200, #size of the track
                 output_backend="webgl" 
                ):        
        self.height = height
        self.fig = figure(tools="xwheel_zoom,xpan,save,reset",
                          active_scroll="xwheel_zoom",
                          height=height,
                          y_axis_location="right", #this is required in order to keep a proper alignment with the sequence
                          output_backend=output_backend)
        self.fig.xaxis[0].formatter = NumeralTickFormatter(format="0,0")
        self.track_loaded_data = None
        self.track_all_data = None
        self.loaded_range = None
        
        


In [None]:
#| hide
t=Track(height=300)
assert t.fig.height == 300

Adding a track with random points as a demonstration. 
genomeNotebook uses the Bokeh library and `track.fig` is a simple Bokeh figure on which you can plot anything you want using Bokeh.

In [None]:
from genomenotebook.browser import GenomeBrowser
import numpy as np

In [None]:
data_path = get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)

track = g.add_track()

x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.fig.scatter(x=x,y=y)
g.show()

In [None]:
#| export
@patch
def _set_track_data_source(self:Track, data, pos, columns):
    columns=[c for c in columns if c] #some arguments can be None => remove them
    data=data.loc[(self.bounds[0] < data[pos]) & (data[pos] < self.bounds[1]),
                  [pos]+columns].copy()
    data=data.sort_values("pos")
    if len(data)>10**5:
        warnings.warn("You are trying to plot more than 10^5 glyphs, this might crash your memory. \
        Consider using bounds or reducing the number of datapoints.")
        
    self.all_data=ColumnDataSource(data)
    self.loaded_data=ColumnDataSource(
        data.loc[(self.fig.x_range.start - self.loaded_range.data["start"][0] < data[pos]
                 ) & (
                 data[pos] < self.fig.x_range.end + self.loaded_range.data["end"][0])]
    )
    
    xcb = CustomJS(
            args = {
                "x_range": self.fig.x_range,
                "all_data":self.all_data,
                "loaded_data": self.loaded_data,
                "track_loaded_range":self.loaded_range,
            },
            code = track_callback_code
        )

    self.fig.x_range.js_on_change('start', xcb)


In [None]:
#| export
@patch
def line(self:Track,
         data: pd.DataFrame, #pandas DataFrame containing the data
         pos: str, #name of the column containing the positions along the genome
         y: str, #name of the column containing the data to be plotted on the y-axis
         **kwargs #enables to pass keyword arguments used by the Bokeh function
        ):
    self._set_track_data_source(data, pos, columns=[y])
    self.fig.line(source=self.loaded_data, x=pos, y=y, **kwargs)


#### Plotting some ChIP-seq data 

In [None]:
g=GenomeBrowser(genome_path=genome_path, 
                gff_path=gff_path, 
                init_pos=50000,
                bounds=(30000,85000), 
                search=False, 
                show_seq=False)

#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
    cov=bw.values(refname,0,g.seq_len,numpy=True)
    
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
                     "cov": cov[::10]})

track=g.add_track()
track.line(data,pos="pos",y="cov", 
           line_color="blue",
           line_width=2)
g.show()

In [None]:
#| export
from bokeh.transform import factor_cmap

In [None]:
#| export
@patch
def scatter(self:Track,
         data: pd.DataFrame, #pandas DataFrame containing the data
         pos: str, #name of the column containing the positions along the genome
         y: str, #name of the column containing the data to be plotted on the y-axis
         factors: str = None, #name of a column of values to be used as factors
         **kwargs, #enables to pass keyword arguments used by the Bokeh function
        ):
    self._set_track_data_source(data, pos, columns=[y,factors])
    
    if factors!=None:
        color=factor_cmap(factors,"Category10_3",tuple(set(data[factors].values)))
        
        self.fig.scatter(source=self.loaded_data, x=pos, y=y, color=color, legend_group=factors, **kwargs)
        
        self.fig.legend.title = factors
        self.fig.legend.location = "top_left"
    else:
        self.fig.scatter(source=self.loaded_data, x=pos, y=y, **kwargs)


#### Plotting some CRISPR screen data

In [None]:
import pandas as pd

In [None]:
#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data,index_col=0)
cui2018data.head()

Unnamed: 0_level_0,gene,essential,pos,ori,coding,fit18,fit75,ntargets,seq
guide,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAAAAACCTGCTGGTGAGGC,,,2202483,-,,-4.850012,-1.437546,1,AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC...
AAAAAACGTATTCGCTTGCA,curA,False,1517891,+,False,-0.094026,-0.100313,1,TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT...
AAAAAAGCGCACTTTTTGAC,,,1919717,+,,-1.10931,-0.24674,1,GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC...
AAAAAAGCGGTGACTTACGA,bglA,False,3042929,+,False,-1.328831,-0.905068,1,GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT...
AAAAAATCTGCCCGTGTCGT,gyrA,True,2337231,-,False,-0.840373,-0.598858,1,ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA...


In [None]:
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)

track=g.add_track(height=100)
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")

track2=g.add_track(height=100)
track2.scatter(data=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()


In [None]:
#| export
@patch
def bar(self:Track,
         data: pd.DataFrame, #pandas DataFrame containing the data
         pos: str, #name of the column containing the positions along the genome
         y: str, #name of the column containing the data to be plotted on the y-axis
         z: str = None, #name of a column containing numerical data rendered as a linear color map (cannot be used for line plots)
         factors: str = None, #name of a column of values to be used as factors
         **kwargs, #enables to pass keyword arguments used by the Bokeh function
        ):
    self._set_track_data_source(data, pos, columns=[y,factors])
    
    if factors!=None:
        color=factor_cmap(factors,"Category10_3",tuple(set(data[factors].values)))
        
        self.fig.vbar(source=self.loaded_data, x=pos, top=y, color=color, legend_group=factors, **kwargs)

        self.fig.legend.location = "top_left"
        self.fig.legend.title = factors
    elif z!=None:
        pass
    else:
        self.fig.vbar(source=source, x=pos, top=y, **kwargs)
        

Showing the same data as vertical bars

In [None]:
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(70000,110000))
track=g.add_track()
track.bar(data=cui2018data,pos="pos",y="fit75",factors="ori")
g.show()


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()