# glyphs

> Contains the Glyph class used to define the different type of glyphs that can be used to represent features, as well the basic plotting functions for GenomeBrowser

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp glyphs

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
from bokeh.io import output_notebook #|hide_line
output_notebook(hide_banner=True) #|hide_line

In [None]:
#| export
import numpy as np
import pandas as pd
import io

from bokeh.plotting import figure
from bokeh.models.tools import BoxZoomTool
from bokeh.models import HoverTool, NumeralTickFormatter, LabelSet
from bokeh.models.glyphs import Patches
from bokeh.models import (
    CustomJS,
    Range1d,
    ColumnDataSource,
)
from genomenotebook.data import get_example_data_dir
from genomenotebook.utils import (
    parse_gff,
)

from collections import defaultdict
import os
from typing import *
import copy
import html



In [None]:
#| export
from collections import defaultdict

In [None]:
#| export
#| hide
default_types=["CDS", "repeat_region", "ncRNA", "rRNA", "tRNA"]
default_attributes=defaultdict(lambda: ["gene", "locus_tag", "product"])

In [None]:
#| hide
default_attributes["CDS"]

['gene', 'locus_tag', 'product']

In [None]:
#| export
#| hide
Y_RANGE = (0, 1)
def get_y_range() -> tuple:
    """Accessor that returns the Y range for the genome browser plot
    """
    return Y_RANGE

In [None]:
#| export
#| hide
def arrow_coordinates(feature, 
                      height: float = 1, #relative height of the feature (between 0 and 1)
                      feature_height: float = 0.15, #fraction of the annotation track occupied by the feature glyphs
                      ):
    
    feature_size = feature.right - feature.left
    
    if feature.strand=="+":
        arrow_base = feature.end - np.minimum(feature_size, 100)
        xbox_min = feature.start
    else:
        arrow_base = feature.end + np.minimum(feature_size, 100)
        xbox_min = arrow_base
    
    xs=(feature.start,
        feature.start,
        arrow_base,
        feature.end,
        arrow_base
       )
    
    offset=feature_height*(1-height)/2
    y_min = 0.05+offset
    y_max = 0.05+feature_height-offset
    ys = (y_min, y_max, y_max, (y_max + y_min) / 2, y_min)
    if "z_order" in feature:
        ys = tuple((y+(feature_height*feature["z_order"]) for y in ys))
    return xs, ys, xbox_min


In [None]:
#| export
#| hide
def box_coordinates(feature, 
                    height: float = 1, #relative height of the feature (between 0 and 1)
                    feature_height: float = 0.15, #fraction of the annotation track occupied by the feature glyphs
                    ):
    xs=(feature.left, feature.left,
        feature.right, feature.right)
    
    offset=feature_height*(1-height)/2
    y_min = 0.05+offset
    y_max = 0.05+feature_height-offset
    ys = (y_min, y_max, y_max, y_min)
    if "z_order" in feature:
        ys = tuple((y+(feature_height*feature["z_order"]) for y in ys))
    return xs, ys, min(xs)

In [None]:
#| export
class Glyph:
    def __init__(self,
                 glyph_type: str ="arrow", # type of the Glyph (arrow or box)
                 colors: tuple = ("purple","orange"), # can be a single color or a tuple of two colors, one for each strand
                 alpha: float = 0.8, #transparency
                 show_name: bool = True, #
                 name_attr: str = default_attributes["CDS"][0], # default attribute to use as the name of the feature to be displayed
                 height: float = 1,  #height of the feature relative to other features (between 0 and 1)
                 ):
        """A class used to define the different types of glyphs shown for different feature types."""
        self.glyph_type=glyph_type
        if type(colors)==str:
            self.colors=(colors,)
        else:
            self.colors=colors

        assert alpha>=0 and alpha <=1
        self.alpha=alpha
        self.show_name=show_name
        self.name_attr=name_attr 
        assert height>0 and height<=1
        self.height=height

        if glyph_type == "box":
            self.coordinates = box_coordinates
        else:
            self.coordinates = arrow_coordinates

    def get_patch(self,
                  feature, # row of a pandas DataFrame extracted from a GFF file
                  feature_height: float = 0.15, #fraction of the annotation track height occupied by the features
                  ):
    
        if len(self.colors)>1:
            color_dic={"+":self.colors[0],
                    "-":self.colors[1]}
        else:
            color_dic=defaultdict(lambda: self.colors[0])

        return self.coordinates(feature, self.height, feature_height), color_dic[feature.strand], self.alpha
    
    def copy(self):
        return copy.deepcopy(self)
    
    def __repr__(self) -> str:
        attributes = ["glyph_type","colors","height","alpha","show_name","name_attr"]
        r=f"Glyph object with attributes:\n"
        for attr in attributes:
            r+=f"\t{attr}: {getattr(self, attr)}\n"
        return r

In [None]:
#| export
def get_default_glyphs(arrow_colors=("purple","orange"), box_colors=("grey",)) -> dict:
    """Returns a dictionnary with:

            * keys: feature types (str)
            * values: a Glyph object
    """
    basic_arrow=Glyph(glyph_type="arrow",colors=arrow_colors,alpha=0.8,show_name=True)
    basic_box=Glyph(glyph_type="box",colors=box_colors,alpha=1,height=0.8,show_name=False)
    
    default_glyphs=defaultdict(lambda: basic_arrow.copy()) #the default glyph will be the same as for CDS etc.
    default_glyphs.update(dict([(f,basic_arrow.copy()) for f in ["CDS", "ncRNA", "rRNA", "tRNA"]]))
    default_glyphs['repeat_region']=basic_box.copy()
    default_glyphs['exon']=basic_box.copy()
    return default_glyphs

default_glyphs=get_default_glyphs()

In [None]:
default_glyphs['CDS']

Glyph object with attributes:
	glyph_type: arrow
	colors: ('purple', 'orange')
	height: 1
	alpha: 0.8
	show_name: True
	name_attr: gene

In [None]:
#| export
def get_patch_coordinates(feature, glyphs_dict, feature_height=0.15, color_attribute=None):
    glyph=glyphs_dict[feature.type]
    coordinate, color, alpha = glyph.get_patch(feature, feature_height=feature_height)
    if color_attribute is not None:
        color = feature.attributes.get(color_attribute, color) # get the color attribute, keep original color if not found.
    return coordinate, color, alpha

In [None]:
data_path = get_example_data_dir()
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")
features=parse_gff(gff_path, 
                   feature_types=default_types, 
                   seq_id="U00096.3")[0]

features.head().apply(get_patch_coordinates,glyphs_dict=default_glyphs, axis=1)


0    (((190, 190, 190, 255, 190), (0.05, 0.2, 0.2, ...
1    (((337, 337, 2699, 2799, 2699), (0.05, 0.2, 0....
2    (((2801, 2801, 3633, 3733, 3633), (0.05, 0.2, ...
3    (((3734, 3734, 4920, 5020, 4920), (0.05, 0.2, ...
4    (((5234, 5234, 5430, 5530, 5430), (0.05, 0.2, ...
dtype: object

In [None]:
#| export

def html_wordwrap(input_string: str, line_len=50, start=0):
    parts = input_string.split()
    out = list()
    running_sum = start
    for part in parts:
        if running_sum > line_len:
            out.append("<br>")
            running_sum = 0
        out.append(part)
        running_sum += len(part)

        
    return " ".join(out)
    

In [None]:
#| export
def _format_attribute(name, value, color="DodgerBlue", wrap=50):
        return f'<span style="color:{color}">{html.escape(name)}</span><span>: {html_wordwrap(html.escape(str(value)), wrap, len(name)+1)}</span>'


In [None]:
#| export
def get_tooltip(feature, attributes, wrap=50):    
    row_type = feature["type"]
    tooltips = list()
    tooltips.append(f'<span style="color:FireBrick">{feature["type"]}</span>')

    if row_type in attributes:
        if attributes[row_type] is not None:
            for attribute in attributes[row_type]:
                if attribute in feature["attributes"]:
                    tooltips.append(_format_attribute(attribute, feature['attributes'][attribute],wrap=wrap))
        else: # append all
            for attribute in feature["attributes"]:
                tooltips.append(_format_attribute(attribute, feature['attributes'][attribute],wrap=wrap))
    return "<br>".join(tooltips)

In [None]:
features.apply(lambda row: get_tooltip(row, default_attributes),
                             axis=1)[0]

'<span style="color:FireBrick">CDS</span><br><span style="color:DodgerBlue">gene</span><span>: thrL</span><br><span style="color:DodgerBlue">locus_tag</span><span>: b0001</span><br><span style="color:DodgerBlue">product</span><span>: thr operon leader peptide</span>'

In [None]:
#| export
def get_feature_name(row, glyphs_dict):
    """ For each row of features DataFrame uses the Glyph object provided in the glyphs_dict to know which attribute to use as the name"""
    if glyphs_dict[row.type].show_name:
        if glyphs_dict[row["type"]].name_attr in row.attributes:
            return row.attributes[glyphs_dict[row.type].name_attr]
        elif len(row.attributes) > 0:
                return next(iter(row.attributes.values()))
        
    return ""


In [None]:
features.head().apply(get_feature_name, glyphs_dict=default_glyphs, axis=1)

0    thrL
1    thrA
2    thrB
3    thrC
4    yaaX
dtype: object

In [None]:
#hide
gl=default_glyphs.copy()
gl["CDS"].name_attr="locus_tag"
assert features.head().apply(get_feature_name, glyphs_dict=gl, axis=1)[0]=="b0001"
assert features.loc[features.type=="repeat_region"].head().apply(get_feature_name, glyphs_dict=default_glyphs, axis=1).values[0]==''

In [None]:
#hide
gl["rRNA"]=Glyph(glyph_type="box", 
                        colors="red", 
                        height=0.5,
                        name_attr="locus_tag",
                        show_name=True)

print(gl["rRNA"])
features.loc[features.type=="rRNA"].head().apply(get_feature_name, glyphs_dict=gl, axis=1)

Glyph object with attributes:
	glyph_type: box
	colors: ('red',)
	height: 0.5
	alpha: 0.8
	show_name: True
	name_attr: locus_tag



235     b0201
238     b0204
239     b0205
3061    b2588
3062    b2589
dtype: object

In [None]:
#| export
def get_feature_patches(features: pd.DataFrame, #DataFrame of the features 
                        left: int, #left limit
                        right: int, #right limit
                        glyphs_dict: dict, #a dictionary of glyphs to use for each feature type
                        attributes: dict = default_attributes, #dictionary with feature type as keys and a list of attributes to display when hovering as values
                        feature_height: float = 0.15, #fraction of the annotation track height occupied by the features
                        label_vertical_offset: float = 0.05,
                        label_justify: str = "center",
                        color_attribute: str =  None
                       )->pd.DataFrame:
    features=features.loc[(features["right"] > left) & (features["left"] < right)]

    if len(features)>0:
        coordinates, colors, alphas = zip(*features.apply(get_patch_coordinates,
                                                          glyphs_dict=glyphs_dict,
                                                          feature_height=feature_height,
                                                          axis=1, 
                                                          color_attribute=color_attribute))
        xs, ys, xbox_mins = zip(*coordinates)
    else:
        colors = []
        xs, ys = [], []
    
    names=list(features.apply(get_feature_name,glyphs_dict=glyphs_dict, axis=1)
               )
    
    tooltips=list(features.apply(lambda row: get_tooltip(row, attributes),
                             axis=1)
                 )

    feature_patches=dict(names=names,
             xs=list(xs),
             ys=list(ys),
             xbox_min=list(xbox_mins),
             color=list(colors),
             alpha=list(alphas),
             pos=list(features.middle.values),
             attributes=tooltips,
             type=features.type
            )
    
    feature_patches=pd.DataFrame(feature_patches)
    
    feature_patches["label_y"] = feature_patches["ys"].map(min) + feature_height + label_vertical_offset
    if label_justify == "center":
        feature_patches["label_x"] = feature_patches.pos
    elif label_justify == "left":
        feature_patches["label_x"] = feature_patches["xbox_min"]
    
    return feature_patches

In [None]:
patches=get_feature_patches(features,8000,12000, glyphs_dict=default_glyphs)
patches

Unnamed: 0,names,xs,ys,xbox_min,color,alpha,pos,attributes,type,label_y,label_x
9,b0008,"(8238, 8238, 9091, 9191, 9091)","(0.05, 0.2, 0.2, 0.125, 0.05)",8238,purple,0.8,8714.5,"<span style=""color:FireBrick"">CDS</span><br><s...",CDS,0.25,8714.5
10,b0009,"(9306, 9306, 9793, 9893, 9793)","(0.05, 0.2, 0.2, 0.125, 0.05)",9306,purple,0.8,9599.5,"<span style=""color:FireBrick"">CDS</span><br><s...",CDS,0.25,9599.5
11,b0010,"(10494, 10494, 10028, 9928, 10028)","(0.05, 0.2, 0.2, 0.125, 0.05)",10028,orange,0.8,10211.0,"<span style=""color:FireBrick"">CDS</span><br><s...",CDS,0.25,10211.0
12,b0011,"(11356, 11356, 10743, 10643, 10743)","(0.05, 0.2, 0.2, 0.125, 0.05)",10743,orange,0.8,10999.5,"<span style=""color:FireBrick"">CDS</span><br><s...",CDS,0.25,10999.5
13,b0012,"(10830, 10830, 11215, 11315, 11215)","(0.05, 0.2, 0.2, 0.125, 0.05)",10830,purple,0.8,11072.5,"<span style=""color:FireBrick"">CDS</span><br><s...",CDS,0.25,11072.5
14,b0013,"(11786, 11786, 11482, 11382, 11482)","(0.05, 0.2, 0.2, 0.125, 0.05)",11482,orange,0.8,11584.0,"<span style=""color:FireBrick"">CDS</span><br><s...",CDS,0.25,11584.0


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()