## mhcpredict plotting

Testing code for plotting results with mhcpredict. We use matplotlib and bokeh.

In [97]:
from bokeh.io import output_notebook, show
output_notebook()
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
from mhcpredict import base, sequtils, analysis
genbankfile = 'testing/zaire-ebolavirus.gb'

In [52]:
df = sequtils.genbank2Dataframe(genbankfile, cds=True)
P = base.getPredictor('tepitope')
savepath1 = 'tepitope'
#run prediction for several alleles and save results to savepath
alleles = ["HLA-DRB1*0101", "HLA-DRB1*0108", "HLA-DRB1*0305", "HLA-DRB1*0401", 
           "HLA-DRB1*0404", "HLA-DRB3*0101", "HLA-DRB4*0104"]
P.predictProteins(df,length=11,alleles=alleles,save=True,path=savepath1)
mhc1alleles = ["HLA-A*01:01","HLA-A*68:02"]
P2 = base.getPredictor('iedbmhc1')
savepath2 = 'iedbmhc1'
#P2.predictProteins(df,length=11,alleles=mhc1alleles,save=True,path=savepath2)


predictions done for 9 proteins


In [126]:
filename = 'tepitope/ZEBOVgp3.mpk'
P.data = pd.read_msgpack(filename)

## a self contained version of plotTracks from epitopemap

In [127]:
colormaps={'tepitope':'Greens','netmhciipan':'Oranges','iedbmhc2':'Pinks',
               'threading':'Purples','iedbmhc1':'Blues'}
colors = {'tepitope':'green','netmhciipan':'orange',
           'iedbmhc1':'blue','iedbmhc2':'pink','threading':'purple'}

def plotTracks(predictors, title='', alleles=2, width=820, height=None,
                seqdepot=None, bcell=None, exp=None, tools=True):
    """Plot binding predictions in multiple alleles for a single protein.
        predictors: a dictionary of Predictor objects
        with their predicted binder data usually for a single protein. If data from  
        multiple proteins is provided the first one is used
        alleles: the minimum number of alleles for a binder to be shown
        """

    from collections import OrderedDict
    from bokeh.models import Range1d,HoverTool,FactorRange,Grid,GridPlot,ColumnDataSource
    from bokeh.plotting import Figure    
    import matplotlib as mpl    
    
    if tools == True:
        tools="xpan, xwheel_zoom, resize, hover, reset, save"
    else:
        tools=''
     
    #get title from the dataframe?
    
    alls=1
    n = alleles
    for m in predictors:
        alls += len(predictors[m].data.groupby('allele'))
    if height==None:
        height = 130+10*alls
    yrange = Range1d(start=0, end=alls+3)
    plot = Figure(title=title,title_text_font_size="11pt",plot_width=width,
                  plot_height=height, y_range=yrange,
                y_axis_label='allele',
                tools=tools,
                background_fill_color="#FAFAFA",
                toolbar_location="below")
    h=3
    if bcell != None:
        plotBCell(plot, bcell, alls)
    if seqdepot != None:
        plotAnnotations(plot,seqdepot)
    if exp is not None:
        plotExp(plot, exp)

    #plotRegions(plot)

    #lists for hover data
    #we plot all rects at once
    x=[];y=[];allele=[];widths=[];clrs=[];peptide=[]
    predictor=[];position=[];score=[];leg=[]
    l=80
    for m in predictors:       
        pred = predictors[m]
        cmap = mpl.cm.get_cmap(colormaps[m])
        df = pred.data        
        sckey = pred.scorekey
        pb = pred.getPromiscuousBinders(data=df,n=n)
        if len(pb) == 0:
            continue
        l = pred.getLength()
        grps = df.groupby('allele')
        alleles = grps.groups.keys()
        if len(pb)==0:
            continue
        c=colors[m]
        leg.append(m)

        for a,g in grps:
            b = pred.getBinders(data=g)             
            b = b[b.pos.isin(pb.pos)] #only promiscuous
            b.sort_values('pos',inplace=True)
            scores = b[sckey].values
            score.extend(scores)
            pos = b['pos'].values
            position.extend(pos)
            x.extend(pos+(l/2.0)) #offset as coords are rect centers
            widths.extend([l for i in scores])
            clrs.extend([c for i in scores])
            y.extend([h+0.5 for i in scores])
            alls = [a for i in scores]
            allele.extend(alls)
            peptide.extend(list(b.peptide.values))
            predictor.extend([m for i in scores])
            h+=1

    source = ColumnDataSource(data=dict(x=x,y=y,allele=allele,peptide=peptide,
                                    predictor=predictor,position=position,score=score))
    plot.rect(x,y, width=widths, height=0.8,
         #x_range=Range1d(start=1, end=seqlen+l),
         color=clrs,line_color='gray',alpha=0.7,source=source)
    
    hover = plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("allele", "@allele"),
        ("position", "@position"),
        ("peptide", "@peptide"),
        ("score", "@score"),
        ("predictor", "@predictor"),
    ])

    seqlen = pred.data.pos.max()+l
    plot.set(x_range=Range1d(start=0, end=seqlen+1))#, bounds=(0, seqlen+1)))
    plot.xaxis.major_label_text_font_size = "8pt"
    plot.xaxis.major_label_text_font_style = "bold"
    plot.ygrid.grid_line_color = None
    plot.yaxis.major_label_text_font_size = '0pt'
    plot.xaxis.major_label_orientation = np.pi/4        
    return plot


In [None]:
plot = plotTracks({'tepitope':P})
show(plot)

In [138]:
def plotStackedArea(pred, title=None):
    from bokeh.charts import Area, vplot
    from bokeh.plotting import Figure
    from bokeh.models import Grid, Range1d, ColumnDataSource, HoverTool
    from bokeh.palettes import brewer
    
    tools="xpan, xwheel_zoom, resize, hover, reset, save"
    #plot = Figure(plot_width=800, plot_height=400)
        
    l = pred.getLength()        
    seqlen = pred.data.pos.max()+l   
    
    if title == None:
        title = list(pred.data.head(1).name)[0]
    #calculate plot data
    df = pred.data
    #b = pred.getBinders(data=df,n=n)
    #l = pred.getLength()
    grps = df.groupby('allele')
    colors = brewer["Spectral"][len(grps)]
    #print t
    data = {}
    scores = []; pos=[]
    for i,g in grps:
        #get running mean instead?        
        y = g.sort_values('pos')[pred.scorekey]                    
        y = y.clip(lower=0)
        y = pd.rolling_mean(y, window=l, center=True).fillna(0)
        data[i] = y
        scores.extend(y.values)
        pos.extend(g.pos.values)
    #t = df.groupby('pos').agg({pred.scorekey:np.sum})
    #t=t.clip(lower=0)
    
    source = ColumnDataSource(data=dict(x=pos,y=y,score=scores))
    
    p = Area(data, title=title, legend="top_left", width=800, height=400, color=colors,
                stack=True, xlabel='position', ylabel='score', tools=tools, source=source)   
    grid = p.select(type=Grid)
    grid.grid_line_color = None
    p.set(x_range=Range1d(start=0, end=seqlen+1, bounds=(0, seqlen+1)))
    
    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("score", "@scores"),
    ])
    return p

plot=plotStackedArea(P)
show(plot)