In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd

In [3]:
import sys
sys.path.append('/home/ahsvargo/xvalid')

In [4]:
from picturedrocks import Rocks
from picturedrocks.performance import FoldTester, PerformanceReport, NearestCentroidClassifier

In [5]:
import plotly.graph_objs as go
from plotly.offline import plot, iplot, init_notebook_mode

In [6]:
import plotly.io as pio

In [7]:
init_notebook_mode(connected=True)

In [8]:
import scanpy.api as sc
import anndata



examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.



In [9]:
def loadSimdata(method, allQ, filterQ, nMarks=""):
    
    path = "/home/ahsvargo/publicData/"
    
    if allQ and filterQ:
        suffix = "-allFilt.npz"
        nGenes = 4999

    elif allQ and not filterQ:
        suffix = ".npz"
        nGenes = 32738

    else:
        suffix = "-preFilt.npz"
        nGenes = 5000
    
    if '1bcs' in method:
        method = '1bcs'
        
    fname = path + "10tests-" + method + "-" + str(nMarks) + "statInfo" + suffix
    dayta = np.load(fname)
    
    #return dayta['tpr'], dayta['fpr'], dayta['prec'], dayta['marks_list']
    return dayta


def makeROC(method, allQ, filterQ, nMarks=""):
    
    dayta = loadSimdata(method, allQ, filterQ, nMarks)
    
    if method is '1bcsR':
        tpr, fpr = dayta['rtpr'], dayta['rfpr']
    elif method is '1bcsC':
        tpr, fpr = dayta['ctpr'], dayta['cfpr']
    else:
        tpr, fpr = dayta['tpr'], dayta['fpr']
    
    
    traces = [go.Scatter(x=fpr, y=tpr, name="ROC", mode='markers'),
              go.Scatter(x=fpr, y=fpr, name="x=y", mode='lines')
             ]

    layout = go.Layout(
        xaxis=dict(
            title='FPR',
            titlefont=dict(
                family='Computer Modern',
                size=18,
            )
        ),
        yaxis=dict(
            title='TPR',
            titlefont=dict(
                family='Computer Modern',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=traces, layout=layout)
    
    return fig

def makePrec(method, allQ, filterQ, nMarks=""):
    
    
    dayta = loadSimdata(method, allQ, filterQ, nMarks)
    
    if method is '1bcsR':
        prec = dayta['rprec']
    elif method is '1bcsC':
        prec = dayta['cprec']
    else:
        prec = dayta['prec']
   
    marks_list = dayta['marks_list']
    
    traces = [go.Scatter(x=marks_list, y=prec, name="Precision", mode='lines+markers'),
             ]

    layout = go.Layout(
        xaxis=dict(
            title='Number of markers',
            titlefont=dict(
                family='Computer Modern',
                size=18,
            )
        ),
        yaxis=dict(
            title='Percent',
            titlefont=dict(
                family='Computer Modern',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=traces, layout=layout)
    
    return fig
    
def makeTprvFpr(method, allQ, filterQ, nMarks=""):
    
    
    dayta = loadSimdata(method, allQ, filterQ, nMarks)
    
    if method is '1bcsR':
        tpr, fpr = dayta['rtpr'], dayta['rfpr']
    elif method is '1bcsC':
        tpr, fpr = dayta['ctpr'], dayta['cfpr']
    else:
        tpr, fpr = dayta['tpr'], dayta['fpr']
        
    marks_list = dayta['marks_list']
    
    traces = [go.Scatter(x=marks_list, y=tpr, name="TPR", mode='markers'),
              go.Scatter(x=marks_list, y=fpr, name="FPR", mode='markers')
             ]

    layout = go.Layout(
        xaxis=dict(
            title='Number of markers',
            titlefont=dict(
                family='Computer Modern',
                size=18,
            )
        ),
        yaxis=dict(
            title='Percent',
            titlefont=dict(
                family='Computer Modern',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=traces, layout=layout)
    
    return(fig)

In [10]:
methods = ['wilcoxon', 't-test_overestim_var', 'logreg']
method = methods[0]

Make an ROC curve for one simulation condition and one method

In [11]:
iplot(makeROC('1bcsR', False, True, ""))

Compare ROC curves across simulation conditions for one method 

In [13]:
def compROC(method, nMarks = ""):
    
    path = "/home/ahsvargo/publicData/"
    
    traces = []
    
    for i in range(3):
        allQ = (i >= 1)
        filterQ = (i==2)
        
        if allQ and filterQ:
            name = "Filter after simulation"
        elif allQ and not filterQ:
            name = "All genes"
        else:
            name = "Filter before simulation"
        
        dayta = loadSimdata(method, allQ, filterQ, nMarks)
        
        if method is '1bcsR':
            tpr, fpr = dayta['rtpr'], dayta['rfpr']
        elif method is '1bcsC':
            tpr, fpr = dayta['ctpr'], dayta['cfpr']
        else:
            tpr, fpr = dayta['tpr'], dayta['fpr']
            
        traces.append(go.Scatter(x=fpr, y=tpr, name=name, mode='markers'))

    traces.append(go.Scatter(x=fpr, y=fpr, name="x=y", mode='lines'))

    layout = go.Layout(
        xaxis=dict(
            title='FPR',
            titlefont=dict(
                family='Computer Modern',
                size=18,
            )
        ),
        yaxis=dict(
            title='TPR',
            titlefont=dict(
                family='Computer Modern',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=traces, layout=layout)
    
    return fig
    
    
    

In [14]:
iplot(compROC('1bcsC'))

Graphs in paper

In [25]:
colorMap = {
    
    'rankCorr' : "#3366cc",
#    methods[0] : "#ef3774", # nice pink
    methods[0] : "#d62728", # red
    methods[1] : "#ff9900",
#    methods[2] : "#22aa99",  # green-blue (close to green)
    methods[2] : "#2ca02c",  #chalkboard
    'edgeR' :  "#9467bd",
    'edgeRdet' : "#c5b0d5",
    'MAST' : '#17becf',
    'MASTdet': '#9edae5',
    'enets': "#8c564b",
#    'genzel' : "#0099c6", # blueish
    'genzel' : "#bcbd22", #vom
    'scvi' : "#f032e6",
    'random' : '#808080'
    
}

In [155]:
layout = go.Layout(
    colorway = [ colorMap[meth] for meth in ['rankCorr'] + methods]  + ["#9467bd"],

    font = dict(
            family='CMU Serif'
    ),
    showlegend=False,
    xaxis=dict(
        title='Number of markers',
        showgrid=False,
        ticks='inside',
        showline=True,
        mirror='ticks',
        #range=[10,170],
        range=[0,320],
        titlefont=dict(
            #family='Computer Modern',
            size=30,
            color='#000'
        ),
        tickfont=dict(
            size=24,
            color='#000'
        )
    ),
    yaxis=dict(
        title='Classification error rate',
        #title='Average precision',
        #title='Matthews correlation coefficient',
        showgrid=False,
        ticks='inside',
        showline=True,
        mirror='ticks',
        #range=[0.03,0.16],
        titlefont=dict(
            #family='Computer Modern',
            size=30,
            color='#000'
        ),
        tickfont=dict(
            size=24,
            color='#000'
        )
    ),
    legend=dict(
        #x=0,
        #y=1,
        #traceorder='normal',
        font=dict(
            #family='Computer Modern',
            size=20,
            #color='#000'
        ),
        #bgcolor='#E2E2E2',
        #bordercolor='#FFFFFF',
        #borderwidth=2
    ),
    margin=go.layout.Margin(
        l=100,
        r=10,
        b=90,
        t=10,
        #pad=4
    ),
)

In [30]:
layout['yaxis']['title']['text'] = 'dung'

In [56]:
layout['xaxis']

layout.XAxis({
    'mirror': 'ticks',
    'range': [0, 420],
    'showgrid': False,
    'showline': True,
    'tickfont': {'color': '#000', 'size': 24},
    'ticks': 'inside',
    'title': {'font': {'color': '#000', 'size': 30}, 'text': 'FPR'}
})

In [34]:
def compPrec(method, nMarks="", layout=""):
    
    path = "/home/ahsvargo/publicData/"
    
    traces = []
    marks = []
    
    for i in range(3):
        allQ = (i >= 1)
        filterQ = (i==2)
        
        if allQ and filterQ:
            name = "Filter after simulation"
        elif allQ and not filterQ:
            name = "All genes"
        else:
            name = "Filter before simulation"
        
        dayta = loadSimdata(method, allQ, filterQ, nMarks)
        
        if method is '1bcsR':
            prec = dayta['rprec']
        elif method is '1bcsC':
            prec = dayta['cprec']
        else:
            prec = dayta['prec']
        
        marks.append(dayta['marks_list'])
        traces.append(go.Scatter( x=np.array(marks[-1]), y=prec, name=name, mode='lines+markers' ))
    
    if layout is "":
        layout = go.Layout(
            xaxis=dict(
                title='Number of markers',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                )
            ),
            yaxis=dict(
                title='Precision',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                    color='#7f7f7f'
                )
            )
        )
    else:
        layout['yaxis']['title']['text'] = 'Precision'
        
    fig = go.Figure(data=traces, layout=layout)
    
    return fig

In [37]:
iplot(compPrec('1bcsC',500))

In [144]:
def methodPrecs(allQ, filterQ, nMarks="", layout=""):
    
    path = "/home/ahsvargo/publicData/"
    
    #methods = ['wilcoxon', 't-test_overestim_var', 'logreg', '1bcsR', '1bcsC']
    methods = ['1bcsC', 'wilcoxon', 't-test_overestim_var', 'logreg']
    precs = []
    marks = []
    traces = []
    
    for method in methods:
        dayta = loadSimdata(method, allQ, filterQ, nMarks)
        if method is '1bcsR':
            prec = dayta['rprec']
        elif method is '1bcsC':
            prec = dayta['cprec']
        else:
            prec = dayta['prec']
        
        precs.append(prec)
        marks.append(dayta['marks_list'])
        traces.append(go.Scatter( x=np.array(marks[-1]), y=prec, name=method, mode='lines+markers' ))
        
    traces.append(go.Scatter(x=list(range(1,400)), y= (1 + .15*(np.array(list(range(1,400))) -1))/np.array(list(range(1,400))), name="x=y", mode='lines')) 

    if layout is "":
        layout = go.Layout(
            xaxis=dict(
                title='Number of markers',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                )
            ),
            yaxis=dict(
                title='Precision',
                range=[-0.2,1.2],
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                    color='#7f7f7f'
                )
            )
        )
    else:
        layout['yaxis']['title']['text'] ='Precision'
        layout['yaxis']['range'] = [-.02, 1.2]
        
    fig = go.Figure(data=traces, layout=layout)
    
    return fig

In [149]:
iplot(methodPrecs(True, False, ""))

In [51]:
pio.write_json(methodPrecs(True, False, 500, layout=layout), "pics2/zhengsim/methods-500prec-allGenes.json")

In [152]:
def methodROC(allQ, filterQ, nMarks,layout=""):
    
    path = "/home/ahsvargo/publicData/"
    
    #methods = ['wilcoxon', 't-test_overestim_var', 'logreg', '1bcsR', '1bcsC']
    methods = ['1bcsC', 'wilcoxon', 't-test_overestim_var', 'logreg']
    tprs = []
    fprs = []
    traces = []
    
    for method in methods:
        dayta = loadSimdata(method, allQ, filterQ, nMarks)

        if method is '1bcsR':
            tpr, fpr = dayta['rtpr'], dayta['rfpr']
        elif method is '1bcsC':
            tpr, fpr = dayta['ctpr'], dayta['cfpr']
        else:
            tpr, fpr = dayta['tpr'], dayta['fpr']
            
        traces.append(go.Scatter(x=fpr, y=tpr, name=method, mode='lines+markers'))
        
    traces.append(go.Scatter(x=fpr, y=fpr, name="x=y", mode='lines'))

    if layout is "":
        layout = go.Layout(
            xaxis=dict(
                title='FPR',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                )
            ),
            yaxis=dict(
                title='TPR',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                    color='#7f7f7f'
                )
            )
        )
    else:
        layout['xaxis']['title']['text'] = 'FPR'
        layout['xaxis']['range'] = [0,1]
        layout['yaxis']['title']['text'] = 'TPR'
        
        
    fig = go.Figure(data=traces, layout=layout)
    
    return fig

In [156]:
iplot(methodROC(True,True,"", layout=layout))

In [159]:
pio.write_json(methodROC(True, False, "", layout=layout), "pics2/zhengsim/methods-ROC-allGenes.json")

In [21]:
def methodTprvFpr(allQ, filterQ, nMarks):
    
    path = "/home/ahsvargo/publicData/"
    
    #methods = ['wilcoxon', 't-test_overestim_var', 'logreg', '1bcsR', '1bcsC']
    methods = ['wilcoxon', 't-test_overestim_var', 'logreg', '1bcsC']
    tprs = []
    fprs = []
    traces = []
    
    for method in methods:
        dayta = loadSimdata(method, allQ, filterQ, nMarks)

        if method is '1bcsR':
            tpr, fpr = dayta['rtpr'], dayta['rfpr']
        elif method is '1bcsC':
            tpr, fpr = dayta['ctpr'], dayta['cfpr']
        else:
            tpr, fpr = dayta['tpr'], dayta['fpr']
            
        marks = dayta['marks_list']
            
        traces.append(go.Scatter(x=marks, y=tpr, name=method + "-TPR", mode='markers'))
        traces.append(go.Scatter(x=marks, y=fpr, name=method + "-FPR", mode='markers'))

    layout = go.Layout(
        xaxis=dict(
            title='Number of markers',
            titlefont=dict(
                family='Computer Modern',
                size=18,
            )
        ),
        yaxis=dict(
            title='Percent',
            titlefont=dict(
                family='Computer Modern',
                size=18,
                color='#7f7f7f'
            )
        )
    )
    fig = go.Figure(data=traces, layout=layout)
    
    return fig

In [22]:
iplot(methodTprvFpr(False, True, 500))

In [154]:
methods

['wilcoxon', 't-test_overestim_var', 'logreg']

In [66]:
def plotClustErrors(allQ=True, filterQ=False, layout=""):
    
    if allQ and not filterQ:
        dayta = np.load("ave-errorRates.npz")
    elif allQ and filterQ:
        dayta = np.load("ave-errorRates-allFilt.npz")
    else: # not allQ
        dayta = np.load("ave-errorRates-preFilt.npz")
        
    allYvals = dayta['errors']
    denom = dayta['ncells']
    xvals = dayta['xvals']
        
    traces =[]
    
    for ind, method in enumerate(['1bcsC'] + methods):
        traces.append( go.Scatter( x=xvals, y=allYvals[ind]/denom, name=method, mode='lines+markers' ) )
        
    if layout is "":
        layout = go.Layout(
            xaxis=dict(
                title='Number of markers',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                )
            ),
            yaxis=dict(
                title='Clustering Error',
                titlefont=dict(
                    family='Computer Modern',
                    size=18,
                    #color='#7f7f7f'
                )
            )
        )
    else:
        layout['yaxis']['title']['text'] = "Classification error rate"
        
    fig = go.Figure(data=traces, layout=layout)
    
    return fig
        

In [150]:
iplot(plotClustErrors(allQ=True,filterQ=False))

In [86]:
pio.write_json(plotClustErrors(False, True, layout=layout), "pics2/zhengsim/clustError-preFilt.json")