## genome alignments view

Show results of a nucmer alignment by putting the coords values into a dataframe which we then create a columndatasource from.
Requirements:
* link corresponding aligned segments with lines
* color code segments

links:
* http://mummer.sourceforge.net/
* https://www.biostars.org/p/14963/
* http://mummer.sourceforge.net/MUMmer.pdf

In [5]:
import os, sys, io, random, subprocess
import string
import numpy as np
import pandas as pd
pd.set_option('display.width',600)
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
from Bio import AlignIO, SeqIO

from IPython.display import HTML

import bokeh
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, Range1d,CustomJS, Slider, HoverTool
from bokeh.models.glyphs import Text, Rect, Line, Segment
from bokeh.layouts import gridplot, column
import panel as pn
import panel.widgets as pnw
pn.extension()

from pybioviz import dashboards, utils, plotters

In [6]:
nuc = utils.align_nucmer('RD900MAF.fa','nuctest.fa')

nucmer --maxgap=500 --mincluster=100 --coords -p nucmer RD900MAF.fa nuctest.fa


In [7]:
def coords_to_plot(data):
    """nucmer coords to plot"""
    
    data = data.reset_index(drop=False)
    print (len(data))
    print(data[:10])
    c=['S1','S2','TAG1','TAG2','index']
    s1 = pd.melt(data[c],id_vars=['index'],value_name='start')
    c=['LEN 1','LEN 2','TAG1','TAG2']
    s2 = pd.melt(data[c],id_vars=['TAG1','TAG2'],value_name='length')
    df = pd.merge(s1,s2,left_index=True,right_index=True)    
    df['y'] = df.variable_y.astype('category').cat.codes
    return df

c=coords_to_plot(nuc)

4
   index    S1    E1    S2    E2  LEN 1  LEN 2  IDENT      TAG1  TAG2
0      0     1  2100     1  2100   2100   2100  100.0  RD900MAF  test
1      1  2661  4410  2101  3850   1750   1750  100.0  RD900MAF  test
2      2  6022  7070  3851  4899   1049   1049  100.0  RD900MAF  test
3      3  7841  8514  4900  5573    674    674  100.0  RD900MAF  test


In [8]:
def view_genome_align(data, x_range=None):
    """Show coords data from nucmer alignment.
    Args: 
        data: a pandas dataframe
    """
        
    df = coords_to_plot(data)
    df['rectx'] = df.start+df.length/2
    #colors = bokeh.palettes.Category10[3]
    colors = utils.random_colors(len(data),seed=8)
    df['color'] = df['index'].apply(lambda x: colors[x], 1)
    #df['y'] = df.y*10
   
    hover = HoverTool(
        tooltips=[            
            ("start", "@start"),                
            ("length", "@length"),             
        ],
        names=['rects']
    ) 
    tools=[hover,"xpan, xwheel_zoom, reset"]
    #x_range=(0,df.rectx.max()+1000) 

    #create data sources
    source = ColumnDataSource(df)
    data = data.reset_index(drop=False)
    data['x1'] = data['S1']+data['LEN 1']/2
    data['x2'] = data['S2']+data['LEN 2']/2
    data['color'] = colors
    source2 = ColumnDataSource(data)
    
    p = figure(title=None, plot_width=900, plot_height=200, x_range=x_range, y_range=(-.3,1.3), tools=tools, 
                    min_border=1, toolbar_location='below')
    #add rects for segments
    rects = Rect(x="rectx", y="y", width="length", height=.3, fill_color="color", line_color="black", fill_alpha=0.6, name='rects')
    labels = Text(x="rectx", y="y", text="start", text_font_size="8pt")
    #draw a line between aligned blocks  
    segs = Segment(x0="x1", y0=0, x1="x2", y1=1, line_color="color", line_width=1, line_alpha=0.5)
    
    p.add_glyph(source, rects)
    p.add_glyph(source, labels)
    p.add_glyph(source2, segs)
    p.yaxis.visible = False
    p.grid.visible = False
    p.toolbar.logo = None
    return p

data = []
p = view_genome_align(nuc)
main = pn.Column(pn.pane.Bokeh(p))
main

4
   index    S1    E1    S2    E2  LEN 1  LEN 2  IDENT      TAG1  TAG2
0      0     1  2100     1  2100   2100   2100  100.0  RD900MAF  test
1      1  2661  4410  2101  3850   1750   1750  100.0  RD900MAF  test
2      2  6022  7070  3851  4899   1049   1049  100.0  RD900MAF  test
3      3  7841  8514  4900  5573    674    674  100.0  RD900MAF  test


## app for genome 

In [42]:
#nuc = utils.align_nucmer('ASM19595v2.fna','orygis.fna')
gff_file = 'ASM19595v2.gff'
features = utils.gff_to_features(gff_file)
title = pn.pane.Markdown('## Genome alignment')
load_btn = pn.widgets.FileInput()
slider = pnw.IntRangeSlider(start=0,end=1000000,step=10,value=(0,2000))
align_pane = pn.pane.Bokeh()
annot_pane = pn.pane.Bokeh()

def update(event):
    xrange = slider.value
    p1 = annot_pane.object = view_genome_align(nuc, x_range=xrange)
    align_pane.object = viewers.view_features(features,preview=False, view_range=p1.x_range,plot_width=900)
    return

slider.param.watch(update,'value')
slider.param.trigger('value')
main = pn.Column(align_pane, annot_pane, sizing_mode='stretch_height')
app = pn.Column(slider,main)
app

269
   index       S1       E1       S2       E2  LEN 1  LEN 2   IDENT         TAG1    TAG2
0      0        1    34802   819926   854714  34802  34789   99.80  NC_000962.3  Orygis
1    185  3155991  3171572  2213412  2228992  15582  15581   99.96  NC_000962.3  Orygis
2    171  2996055  3074519  1285536  1363999  78465  78464   99.93  NC_000962.3  Orygis
3    172  3074552  3119736  1363975  1409153  45185  45179   99.93  NC_000962.3  Orygis
4    173  3100869  3102010  1018153  1017007   1142   1147   81.86  NC_000962.3  Orygis
5    174  3116388  3117192  3431749  3430940    805    810   84.15  NC_000962.3  Orygis
6    175  3119770  3119956  1409334  1409520    187    187  100.00  NC_000962.3  Orygis
7    176  3119921  3120030  1409780  1409889    110    110  100.00  NC_000962.3  Orygis
8    177  3119994  3120468  1409924  1410398    475    475   99.79  NC_000962.3  Orygis
9    178  3119995  3120176  1409412  1409594    182    183   78.49  NC_000962.3  Orygis


In [80]:
pn.pane.Bokeh(viewers.view_features(features))