In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp options

# Options

In [None]:
#|exec_doc
#|hide
from bokeh.io import output_notebook
output_notebook(hide_banner=True) 

## Selecting feature types and attributes

It is possible to choose which feature type to display. The default feature types are \["CDS", "repeat_region", "ncRNA", "rRNA", "tRNA"].

You can inspect the gff file to see what feature types and attributes are available.

In [None]:
import genomenotebook as gn
import os

data_path = gn.get_example_data_dir()
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")
features = gn.parse_gff(gff_path)
features.head()

Unnamed: 0,seq_id,source,type,start,end,score,strand,phase,attributes,gene_biotype,...,rpt_type,recombination_class,gene,genome,orig_transcript_id,locus_tag,pseudo,left,right,middle
0,U00096.3,Genbank,region,1,4641652,.,+,.,ID=U00096.3:1..4641652;Dbxref=taxon:511145;Is_...,,...,,,,chromosome,,,,1,4641652,2320826.5
1,U00096.3,Genbank,gene,190,255,.,+,.,"ID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:E...",protein_coding,...,,,thrL,,,b0001,,190,255,222.5
2,U00096.3,Genbank,CDS,190,255,.,+,0,ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni...,,...,,,thrL,,gnl|b0001|mrna.b0001,b0001,,190,255,222.5
3,U00096.3,Genbank,gene,337,2799,.,+,.,"ID=gene-b0002;Dbxref=ASAP:ABE-0000008,ECOCYC:E...",protein_coding,...,,,thrA,,,b0002,,337,2799,1568.0
4,U00096.3,Genbank,CDS,337,2799,.,+,0,ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni...,,...,,,thrA,,gnl|b0002|mrna.b0002,b0002,,337,2799,1568.0


In [None]:
# Available feature types
set(features.type)

{'CDS',
 'exon',
 'gene',
 'mobile_genetic_element',
 'ncRNA',
 'origin_of_replication',
 'pseudogene',
 'rRNA',
 'recombination_feature',
 'region',
 'repeat_region',
 'sequence_feature',
 'tRNA'}

In [None]:
# Available attributes
features.columns

Index(['seq_id', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase',
       'attributes', 'gene_biotype', 'gene_synonym', 'mol_type', 'Name',
       'transl_except', 'Note', 'strain', 'Parent', 'orig_protein_id',
       'substrain', 'part', 'mobile_element_type', 'protein_id', 'gbkey',
       'product', 'exception', 'Dbxref', 'ID', 'Is_circular', 'transl_table',
       'rpt_type', 'recombination_class', 'gene', 'genome',
       'orig_transcript_id', 'locus_tag', 'pseudo', 'left', 'right', 'middle'],
      dtype='object')

In [None]:
#Choosing the feature types, attributes and feature name to display
g=gn.GenomeBrowser(gff_path, 
                   feature_types = ["tRNA","rRNA"],
                   attributes = ["gene","locus_tag","strand","start","end"], #will be displayed when hovering
                   feature_name = "gene", 
                   init_pos=226000)
g.show()

## Changing colors

The glyphs shown for features are defined as follow: (patch_type, (patch_color_plus_strand, patch_color_minus_strand))
For now there are only two patch_type: "arrow" or "box"

In [None]:
glyphs=gn.default_glyphs.copy() #copying the default glyphs to modify them
glyphs

defaultdict(<function genomenotebook.utils.<lambda>()>,
            {'CDS': ('arrow', ('purple', 'orange')),
             'ncRNA': ('arrow', ('purple', 'orange')),
             'rRNA': ('arrow', ('purple', 'orange')),
             'tRNA': ('arrow', ('purple', 'orange')),
             'repeat_region': ('box', ('grey',)),
             'exon': ('box', ('grey',))})

Modifying the default glyphs

In [None]:
glyphs["rRNA"] = ('arrow', ('red',))
glyphs["CDS"] = ('arrow', ('blue','green'))
g=gn.GenomeBrowser(gff_path, glyphs=glyphs, init_pos=224000, bounds=(200000,300000))
g.show()

## Changing feature name

You can chose which attribute of the GFF file should be used a the displayed feature name. The feature_name needs to belong to the list of attributes. The default list of attributes is \["locus_tag","gene","product"].

In [None]:
g=gn.GenomeBrowser(gff_path, 
                   attributes=["protein_id",'gene','product'], 
                   feature_name="protein_id")
g.show()