In [1]:
import numpy as np
import pandas as pd

In [2]:
import sys
sys.path.append('/home/ahsvargo/xvalid')

In [3]:
from picturedrocks import Rocks
from picturedrocks.performance import FoldTester, PerformanceReport, NearestCentroidClassifier

In [8]:
import plotly.graph_objs as go
from plotly.offline import plot, iplot, init_notebook_mode

In [4]:
import anndata

In [10]:
init_notebook_mode(connected=True)

In [5]:
import scanpy.api as sc

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, color_map='viridis')  # low dpi (dots per inch) yields small inline figures
sc.logging.print_versions()



examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.



scanpy==1.3.7+56.gde16b79.dirty anndata==0.6.11 numpy==1.14.6 scipy==1.1.0 pandas==0.23.4 scikit-learn==0.20.0 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


## Random Forest Classifier

In [12]:
from sklearn.ensemble import RandomForestClassifier

In [13]:
class RandomForest:
    def __init__(self):
        self.traindata = None
        self.RFC = RandomForestClassifier(n_estimators=100, n_jobs=-1)
        
    def train(self, data):
        self.traindata = data
        self.RFC.fit(data.X, data.y[:,0])
        
    def test(self, Xtest, sparse):
        return self.RFC.predict(Xtest)
    

### Zheng Simulated

In [6]:
import scipy.sparse as spsp

In [7]:
dataset = "zhengsim"

In [8]:
def makePath(test, allQ = True, filterQ = False):
    if allQ: 
        partPath = "allGenes"
    if filterQ and not allQ:
        partPath = "filtered"
        
    fName = "sparseCounts.npz"
    if allQ and filterQ:
        fName = "filtered-splat.h5ad"
        
    path = "/home/ahsvargo/turbo/scData/zheng17/splatter-bCells/" + partPath + "/" + str(test) + "/"
    
    return path, fName
    

In [13]:
def makeFoldTester(path, fName):
    
    if 'npz' in fName:
        X = spsp.load_npz(path + fName)
    elif 'h5ad' in fName:
        adata = sc.read_h5ad(path + fName)
        X = adata.X
        
    y = np.loadtxt(path + "y.dat", dtype='int')
    y = y - 1
    
    # use this just to load the folds because I am lazy
    data = Rocks(X,y)
    ft = FoldTester(data)
    ft.loadfolds(path + "bCells-5folds.npz")
    
    data.normalize(totalexpr=10000, log=True)
    
    
    # don't need to make rocks since we are not doing any cross-validation.
    # We are only testing a selected set of markers.
    #ft.makerocks(verbose=0)
    
    
    # don't actually need the AnnData object
    # the full dataset
    #adata = anndata.AnnData(X = X)
    #adata.obs['zhengsim_clusters'] = pd.Series(y, dtype="category", index=adata.obs.index)
    
    return ft, [ str(a) for a in np.unique(y)]

In [14]:
makePath(0)[0]

'/home/ahsvargo/turbo/scData/zheng17/splatter-bCells/allGenes/0/'

In [15]:
def testMarkers(marks_list, ft, classifier, pvalMarkerList):
    
    xlist = []
    ylist = []
    
    for marks_per_clust in marks_list:
    
        print("markers per cluster: {}".format(marks_per_clust), flush=True)
        # every method other than elastic nets:
        #myMarks = [list(set().union(*table[:,:marks_per_clust])) for table in pvalMarkerList]
        # elastic nets:
        myMarks = [list(set().union(*[marks[:marks_per_clust] for marks in table])) for table in pvalMarkerList]
        ft.markers = myMarks
    
        print("Number of markers per fold: {}".format([len(a) for a in ft.markers]))
        xlist.append(np.array([len(a) for a in ft.markers]).mean())
    
        ft.classify(classifier)
        wrong = np.sum(  (ft.data.y.astype(int).flatten() != ft.yhat) * 1.0 )
        ylist.append(wrong)
        
    return xlist, ylist

In [3]:
methods = ['wilcoxon', 't-test_overestim_var', 'logreg']
method = methods[1]

In [17]:
%%time

method = methods[0]
marks_list = list(2*np.array(range(1,51))) + list(5*np.array(range(21,61))) + list(10*np.array(range(31,51)))

fullXlist = []
fullYlist = []

for test in range(10):
    print("*** WORKING ON TEST {} ***".format(test), flush=True)

    
    path, fName = makePath(test, allQ=False, filterQ=True)
    print("Counts data in: " + path + fName)
    ft, lookup = makeFoldTester(path, fName)
    
    folds = ft.folds
    
    print("Loading markers from: "+ path + dataset + "-filtered-" + method + "-markerList.npz")
    pvalMarkerList = np.load(path + dataset + "-filtered-" + method + "-markerList.npz")['marks']
    pvalScoreList = np.load(path+ dataset + "-filtered-" + method + "-markerList.npz")['pvals']
    
    xlist, ylist = testMarkers(marks_list, ft, RandomForest, pvalMarkerList)
    fullXlist.append(xlist)
    fullYlist.append(ylist)
    

np.savez(method + "-errorRates-preFilt-normalized", xlist=fullXlist, ylist=fullYlist)
print("Saving data to: " + method + "-errorRates-allFilt-normalized")

*** WORKING ON TEST 0 ***
Counts data in: /home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/0/sparseCounts.npz
Loading markers from: /home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/0/zhengsim-filtered-wilcoxon-markerList.npz
markers per cluster: 2
Number of markers per fold: [2, 2, 2, 2, 2]
markers per cluster: 4
Number of markers per fold: [4, 4, 4, 4, 4]
markers per cluster: 6
Number of markers per fold: [6, 6, 6, 6, 6]
markers per cluster: 8
Number of markers per fold: [8, 8, 8, 8, 8]
markers per cluster: 10
Number of markers per fold: [10, 10, 10, 10, 10]
markers per cluster: 12
Number of markers per fold: [12, 12, 12, 12, 12]
markers per cluster: 14
Number of markers per fold: [14, 14, 14, 14, 14]
markers per cluster: 16
Number of markers per fold: [16, 16, 16, 16, 16]
markers per cluster: 18
Number of markers per fold: [18, 18, 18, 18, 18]
markers per cluster: 20
Number of markers per fold: [20, 20, 20, 20, 20]
markers per cluster: 22
Number of markers per

markers per cluster: 460
Number of markers per fold: [460, 460, 460, 460, 460]
markers per cluster: 470
Number of markers per fold: [470, 470, 470, 470, 470]
markers per cluster: 480
Number of markers per fold: [480, 480, 480, 480, 480]
markers per cluster: 490
Number of markers per fold: [490, 490, 490, 490, 490]
markers per cluster: 500
Number of markers per fold: [500, 500, 500, 500, 500]
*** WORKING ON TEST 1 ***
Counts data in: /home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/1/sparseCounts.npz
Loading markers from: /home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/1/zhengsim-filtered-wilcoxon-markerList.npz
markers per cluster: 2
Number of markers per fold: [2, 2, 2, 2, 2]
markers per cluster: 4
Number of markers per fold: [4, 4, 4, 4, 4]
markers per cluster: 6
Number of markers per fold: [6, 6, 6, 6, 6]
markers per cluster: 8
Number of markers per fold: [8, 8, 8, 8, 8]
markers per cluster: 10
Number of markers per fold: [10, 10, 10, 10, 10]
markers per clu

markers per cluster: 410
Number of markers per fold: [410, 410, 410, 410, 410]
markers per cluster: 420
Number of markers per fold: [420, 420, 420, 420, 420]
markers per cluster: 430
Number of markers per fold: [430, 430, 430, 430, 430]
markers per cluster: 440
Number of markers per fold: [440, 440, 440, 440, 440]
markers per cluster: 450
Number of markers per fold: [450, 450, 450, 450, 450]
markers per cluster: 460
Number of markers per fold: [460, 460, 460, 460, 460]
markers per cluster: 470
Number of markers per fold: [470, 470, 470, 470, 470]
markers per cluster: 480
Number of markers per fold: [480, 480, 480, 480, 480]
markers per cluster: 490
Number of markers per fold: [490, 490, 490, 490, 490]
markers per cluster: 500
Number of markers per fold: [500, 500, 500, 500, 500]
*** WORKING ON TEST 2 ***
Counts data in: /home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/2/sparseCounts.npz
Loading markers from: /home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/2/zhe

markers per cluster: 360
Number of markers per fold: [360, 360, 360, 360, 360]
markers per cluster: 370
Number of markers per fold: [370, 370, 370, 370, 370]
markers per cluster: 380
Number of markers per fold: [380, 380, 380, 380, 380]
markers per cluster: 390
Number of markers per fold: [390, 390, 390, 390, 390]
markers per cluster: 400
Number of markers per fold: [400, 400, 400, 400, 400]
markers per cluster: 410
Number of markers per fold: [410, 410, 410, 410, 410]
markers per cluster: 420
Number of markers per fold: [420, 420, 420, 420, 420]
markers per cluster: 430
Number of markers per fold: [430, 430, 430, 430, 430]
markers per cluster: 440
Number of markers per fold: [440, 440, 440, 440, 440]
markers per cluster: 450
Number of markers per fold: [450, 450, 450, 450, 450]
markers per cluster: 460
Number of markers per fold: [460, 460, 460, 460, 460]
markers per cluster: 470
Number of markers per fold: [470, 470, 470, 470, 470]
markers per cluster: 480
Number of markers per fold:

markers per cluster: 310
Number of markers per fold: [310, 310, 310, 310, 310]
markers per cluster: 320
Number of markers per fold: [320, 320, 320, 320, 320]
markers per cluster: 330
Number of markers per fold: [330, 330, 330, 330, 330]
markers per cluster: 340
Number of markers per fold: [340, 340, 340, 340, 340]
markers per cluster: 350
Number of markers per fold: [350, 350, 350, 350, 350]
markers per cluster: 360
Number of markers per fold: [360, 360, 360, 360, 360]
markers per cluster: 370
Number of markers per fold: [370, 370, 370, 370, 370]
markers per cluster: 380
Number of markers per fold: [380, 380, 380, 380, 380]
markers per cluster: 390
Number of markers per fold: [390, 390, 390, 390, 390]
markers per cluster: 400
Number of markers per fold: [400, 400, 400, 400, 400]
markers per cluster: 410
Number of markers per fold: [410, 410, 410, 410, 410]
markers per cluster: 420
Number of markers per fold: [420, 420, 420, 420, 420]
markers per cluster: 430
Number of markers per fold:

markers per cluster: 280
Number of markers per fold: [280, 280, 280, 280, 280]
markers per cluster: 285
Number of markers per fold: [285, 285, 285, 285, 285]
markers per cluster: 290
Number of markers per fold: [290, 290, 290, 290, 290]
markers per cluster: 295
Number of markers per fold: [295, 295, 295, 295, 295]
markers per cluster: 300
Number of markers per fold: [300, 300, 300, 300, 300]
markers per cluster: 310
Number of markers per fold: [310, 310, 310, 310, 310]
markers per cluster: 320
Number of markers per fold: [320, 320, 320, 320, 320]
markers per cluster: 330
Number of markers per fold: [330, 330, 330, 330, 330]
markers per cluster: 340
Number of markers per fold: [340, 340, 340, 340, 340]
markers per cluster: 350
Number of markers per fold: [350, 350, 350, 350, 350]
markers per cluster: 360
Number of markers per fold: [360, 360, 360, 360, 360]
markers per cluster: 370
Number of markers per fold: [370, 370, 370, 370, 370]
markers per cluster: 380
Number of markers per fold:

markers per cluster: 255
Number of markers per fold: [255, 255, 255, 255, 255]
markers per cluster: 260
Number of markers per fold: [260, 260, 260, 260, 260]
markers per cluster: 265
Number of markers per fold: [265, 265, 265, 265, 265]
markers per cluster: 270
Number of markers per fold: [270, 270, 270, 270, 270]
markers per cluster: 275
Number of markers per fold: [275, 275, 275, 275, 275]
markers per cluster: 280
Number of markers per fold: [280, 280, 280, 280, 280]
markers per cluster: 285
Number of markers per fold: [285, 285, 285, 285, 285]
markers per cluster: 290
Number of markers per fold: [290, 290, 290, 290, 290]
markers per cluster: 295
Number of markers per fold: [295, 295, 295, 295, 295]
markers per cluster: 300
Number of markers per fold: [300, 300, 300, 300, 300]
markers per cluster: 310
Number of markers per fold: [310, 310, 310, 310, 310]
markers per cluster: 320
Number of markers per fold: [320, 320, 320, 320, 320]
markers per cluster: 330
Number of markers per fold:

markers per cluster: 230
Number of markers per fold: [230, 230, 230, 230, 230]
markers per cluster: 235
Number of markers per fold: [235, 235, 235, 235, 235]
markers per cluster: 240
Number of markers per fold: [240, 240, 240, 240, 240]
markers per cluster: 245
Number of markers per fold: [245, 245, 245, 245, 245]
markers per cluster: 250
Number of markers per fold: [250, 250, 250, 250, 250]
markers per cluster: 255
Number of markers per fold: [255, 255, 255, 255, 255]
markers per cluster: 260
Number of markers per fold: [260, 260, 260, 260, 260]
markers per cluster: 265
Number of markers per fold: [265, 265, 265, 265, 265]
markers per cluster: 270
Number of markers per fold: [270, 270, 270, 270, 270]
markers per cluster: 275
Number of markers per fold: [275, 275, 275, 275, 275]
markers per cluster: 280
Number of markers per fold: [280, 280, 280, 280, 280]
markers per cluster: 285
Number of markers per fold: [285, 285, 285, 285, 285]
markers per cluster: 290
Number of markers per fold:

markers per cluster: 205
Number of markers per fold: [205, 205, 205, 205, 205]
markers per cluster: 210
Number of markers per fold: [210, 210, 210, 210, 210]
markers per cluster: 215
Number of markers per fold: [215, 215, 215, 215, 215]
markers per cluster: 220
Number of markers per fold: [220, 220, 220, 220, 220]
markers per cluster: 225
Number of markers per fold: [225, 225, 225, 225, 225]
markers per cluster: 230
Number of markers per fold: [230, 230, 230, 230, 230]
markers per cluster: 235
Number of markers per fold: [235, 235, 235, 235, 235]
markers per cluster: 240
Number of markers per fold: [240, 240, 240, 240, 240]
markers per cluster: 245
Number of markers per fold: [245, 245, 245, 245, 245]
markers per cluster: 250
Number of markers per fold: [250, 250, 250, 250, 250]
markers per cluster: 255
Number of markers per fold: [255, 255, 255, 255, 255]
markers per cluster: 260
Number of markers per fold: [260, 260, 260, 260, 260]
markers per cluster: 265
Number of markers per fold:

markers per cluster: 180
Number of markers per fold: [180, 180, 180, 180, 180]
markers per cluster: 185
Number of markers per fold: [185, 185, 185, 185, 185]
markers per cluster: 190
Number of markers per fold: [190, 190, 190, 190, 190]
markers per cluster: 195
Number of markers per fold: [195, 195, 195, 195, 195]
markers per cluster: 200
Number of markers per fold: [200, 200, 200, 200, 200]
markers per cluster: 205
Number of markers per fold: [205, 205, 205, 205, 205]
markers per cluster: 210
Number of markers per fold: [210, 210, 210, 210, 210]
markers per cluster: 215
Number of markers per fold: [215, 215, 215, 215, 215]
markers per cluster: 220
Number of markers per fold: [220, 220, 220, 220, 220]
markers per cluster: 225
Number of markers per fold: [225, 225, 225, 225, 225]
markers per cluster: 230
Number of markers per fold: [230, 230, 230, 230, 230]
markers per cluster: 235
Number of markers per fold: [235, 235, 235, 235, 235]
markers per cluster: 240
Number of markers per fold:

markers per cluster: 155
Number of markers per fold: [155, 155, 155, 155, 155]
markers per cluster: 160
Number of markers per fold: [160, 160, 160, 160, 160]
markers per cluster: 165
Number of markers per fold: [165, 165, 165, 165, 165]
markers per cluster: 170
Number of markers per fold: [170, 170, 170, 170, 170]
markers per cluster: 175
Number of markers per fold: [175, 175, 175, 175, 175]
markers per cluster: 180
Number of markers per fold: [180, 180, 180, 180, 180]
markers per cluster: 185
Number of markers per fold: [185, 185, 185, 185, 185]
markers per cluster: 190
Number of markers per fold: [190, 190, 190, 190, 190]
markers per cluster: 195
Number of markers per fold: [195, 195, 195, 195, 195]
markers per cluster: 200
Number of markers per fold: [200, 200, 200, 200, 200]
markers per cluster: 205
Number of markers per fold: [205, 205, 205, 205, 205]
markers per cluster: 210
Number of markers per fold: [210, 210, 210, 210, 210]
markers per cluster: 215
Number of markers per fold:

A job ended in the middle of calculating data - that is these files.  We don't actually need to do the work to combine the files since it is old (unnormalized) data.  See the next note below.

In [117]:
method = methods[2]

In [118]:
pvalMarkerList = np.load(path + dataset + "-" + method + "-markerList.npz")['marks']
pvalScoreList = np.load(path+ dataset + "-" + method + "-markerList.npz")['pvals']

In [121]:
pvalMarkerList[:,0,:10]

array([[  277, 25233, 25104,  1068, 21160, 26799, 10526, 24488,  4701,
         9651],
       [  277, 25104,  1068,  4701, 24488, 15012, 25233, 10526,  9783,
        26799],
       [  277, 25233, 15012, 25104, 26799,  1068, 24488, 21160,  9783,
        13661],
       [  277,  1068, 25104,  4701, 11286, 10526,  9783, 25233, 24488,
        10958],
       [  277, 25233, 25104,  1068, 24488, 10526,  4701, 26799, 10958,
        11286]])

In [115]:
np.where(pvalMarkerList[0][0]==277)

(array([], dtype=int64),)

In [109]:
pvalMarkerList

array([[[  277, 25104, 25233, ..., 31171, 18940, 31697],
        [  277, 25104, 25233, ..., 31171, 18940, 31697]],

       [[  277, 25104,  1068, ..., 29455,  4078,   672],
        [  277, 25104,  1068, ..., 29455,   672,  4078]],

       [[  277, 25104, 25233, ..., 21919, 16118, 24415],
        [  277, 25104, 25233, ..., 21919, 16118, 24415]],

       [[  277, 25104, 25233, ..., 21916, 16554, 29473],
        [  277, 25104, 25233, ..., 21916, 16554, 29473]],

       [[  277, 25104, 25233, ..., 19772, 25409,  4771],
        [  277, 25104, 25233, ..., 19772, 25409,  4771]]])

There is clearly a problem here with the way that logistic regression is working.  This is just wrong.  We find only genes with almost no expression as being more significant than gene 277, which is highly expressed with a large DE factor. 

Yes.  Because `rankby_abs = True` was not doing anything AND logisitic regression only performs the regression for one of the clusters - here, I perform it for cluster 1 (the "normal" one) so we find the genes that are DOWNREGULATED in cluster 0.  Ugh.

I will run it again with a fixed `rankby_abs`.  The fixed version of the scanpy file is included in the scanpy directory at the root of the code repo.

In [40]:
stuff0 = np.load(method + "-errorRates-partial.npz")
stuff1 = np.load(method + "-errorRates-partial2.npz")

It seems that I calculated all of the data without normalizing before checking the reclustering error rate.  The information that we are interested in appears in the "-errorRates-normalized" files

In [41]:
stuff = np.load(methods[1] + "-errorRates.npz")

In [42]:
xlist = stuff0['xlist']
ylist = stuff0['ylist']

In [43]:
ylist.shape

(8, 110)

In [29]:
ylist[2]

array([1442., 1313., 1153., 1063., 1045.,  952.,  886.,  877.,  807.,
        757.,  758.,  707.,  688.,  691.,  647.,  657.,  617.,  625.,
        594.,  593.,  596.,  595.,  593.,  571.,  591.,  591.,  588.,
        600.,  578.,  571.,  590.,  589.,  584.,  594.,  613.,  553.,
        587.,  572.,  589.,  568.,  579.,  564.,  560.,  572.,  561.,
        565.,  567.,  589.,  563.,  549.,  555.,  553.,  561.,  560.,
        553.,  556.,  531.,  548.,  593.,  549.,  567.,  561.,  555.,
        552.,  555.,  555.,  568.,  527.,  551.,  546.,  554.,  538.,
        529.,  548.,  533.,  549.,  534.,  547.,  530.,  541.,  533.,
        525.,  530.,  551.,  529.,  537.,  538.,  533.,  553.,  535.,
        555.,  538.,  550.,  534.,  523.,  514.,  525.,  510.,  551.,
        496.,  513.,  546.,  527.,  532.,  534.,  538.,  537.,  550.,
        542.,  523.])

## Dealing with an issue with the t-test method

The t-test scores can be tied - in those cases, it seems that the ordering of the genes is chosen randomly.  Since we choose markers from both clusters in the above analysis (which I guess it kind of foolish), this means that (very occasionally) two markers with an equal score are put in different order in the two clusters and thus they are both chosen.  Which means that sometimes we actually end up with more markers than our cutoff (when we take the union of the lists from the clusters).

This only appears to happen in test 0 for the non-filtered case.  And again, it only changes things by shifting the x-value by around .2.  So I'm not sure what to do here.

* Throw away test 0?  This would mean that we are only averaging over 9 trials for the t-test. 
* Re-run the t-test while restricting to just the first 5 markers for cluster 0?  This also feels disingenuous - not really how the pipeline that I write about actually goes.
* Average the x-values as well?  This would produce an okay curve, but not sure how logically sound this method would be.

Thinking about it, I am going to go with method 1 and discuss it in the paper.  This seems to be the most ethical here - I was having issues with tied scores for one of the trials, so couldn't get a stable set of markers to be selected.

In [56]:
stuff = np.load(methods[1] + "-errorRates-normalized.npz")

In [59]:
testing = np.load('1bcs-errorRates.npz')['xlist']

In [63]:
for i in range(10):
    print(np.all(stuff['xlist'][i] == testing))

False
True
True
True
True
True
True
True
True
True


In [65]:
stuff['xlist'][0]

array([  2. ,   4. ,   6. ,   8. ,  10. ,  12. ,  14. ,  16. ,  18. ,
        20. ,  22. ,  24. ,  26. ,  28. ,  30. ,  32. ,  34. ,  36. ,
        38. ,  40. ,  42. ,  44. ,  46. ,  48.2,  50. ,  52. ,  54. ,
        56. ,  58. ,  60. ,  62. ,  64. ,  66. ,  68. ,  70. ,  72. ,
        74. ,  76. ,  78. ,  80. ,  82. ,  84. ,  86. ,  88.2,  90. ,
        92. ,  94. ,  96. ,  98. , 100. , 105. , 110. , 115. , 120. ,
       125.2, 130.2, 135. , 140. , 145. , 150.2, 155. , 160. , 165. ,
       170.2, 175.2, 180.4, 185. , 190. , 195. , 200. , 205. , 210. ,
       215.2, 220. , 225. , 230. , 235.2, 240. , 245.2, 250.2, 255. ,
       260. , 265. , 270.2, 275. , 280.4, 285.2, 290. , 295.4, 300.2,
       310. , 320. , 330. , 340.4, 350. , 360.2, 370. , 380.2, 390. ,
       400. , 410. , 420.6, 431.2, 440.4, 450. , 460. , 470. , 480.2,
       490. , 500. ])

In [76]:
method = methods[1]
method

't-test_overestim_var'

In [77]:
path, fName = makePath(0, allQ=True)

In [78]:
pvalMarkerList = np.load(path + dataset + "-" + method + "-markerList.npz")['marks']
pvalScoreList = np.load(path+ dataset + "-" + method + "-markerList.npz")['pvals']

In [82]:
pvalMarkerList.shape

(5, 2, 500)

In [81]:
pvalScoreList

array([[[ 15.510094 ,  12.015761 ,  11.1079   , ...,   2.1112008,
           2.1112006,  -2.1106884],
        [-15.325076 , -11.872427 , -10.975395 , ...,  -2.0860167,
          -2.0860164,   2.0855105]],

       [[ 16.404337 ,  12.6877165, -10.044126 , ...,  -2.065504 ,
          -2.065504 ,  -2.065504 ],
        [-16.870272 , -13.048088 ,  10.3294115, ...,   2.124171 ,
           2.124171 ,   2.124171 ]],

       [[ 16.158033 ,  11.643089 ,  11.254923 , ...,  -2.020829 ,
           2.0208235,  -2.0206623],
        [-16.919212 , -12.191577 , -11.785125 , ...,   2.116027 ,
          -2.1160214,   2.1158526]],

       [[ 16.219963 ,  10.986259 ,  10.223253 , ...,  -2.0259993,
          -2.025238 ,  -2.025238 ],
        [-16.950058 , -11.480773 , -10.683423 , ...,   2.1171937,
           2.116398 ,   2.116398 ]],

       [[ 17.267298 ,  12.397908 ,  11.134918 , ...,   2.0943358,
          -2.094107 ,  -2.094107 ],
        [-16.723309 , -12.007325 , -10.784123 , ...,  -2.0283558,
        

Finding the first time that this occurs:

In [95]:
pvalScoreList[:,:,45:50]

array([[[-3.0675182, -3.061157 , -3.0431628, -3.0393167, -3.026779 ],
        [ 3.0309262,  3.024641 ,  3.0068614,  3.003061 ,  2.990673 ]],

       [[-2.9291499, -2.928183 , -2.928088 ,  2.9024959, -2.8964453],
        [ 3.0123472,  3.011353 ,  3.011255 , -2.9849362,  2.9787138]],

       [[ 2.8819492,  2.8084996,  2.7844148,  2.778223 , -2.776451 ],
        [-3.0177133, -2.9408038, -2.9155843, -2.9091008,  2.9072454]],

       [[-2.8966653,  2.895636 , -2.8930779, -2.8930779, -2.8610115],
        [ 3.0270503, -3.0259748,  3.0233014,  3.0233014,  2.9897916]],

       [[ 2.968012 ,  2.9540539,  2.9340377,  2.892573 ,  2.8894289],
        [-2.874508 , -2.8609893, -2.8416038, -2.8014455, -2.7984002]]],
      dtype=float32)

In [97]:
pvalMarkerList[3,:,45:50]

array([[22057, 21150,  6085, 25085, 19351],
       [22057, 21150, 25085,  6085, 19351]])

## Turn the error rates into averages so that we can plot them!

In [53]:
ylist.mean(axis=0).shape

(110,)

In [18]:
allQ = False
filterQ = True

suffix = ""

if allQ and filterQ:
    suffix = "-allFilt"
elif not allQ:
    suffix = "-preFilt"

rates = []
for method in ['1bcs'] + methods:
    
    if method is '1bcs':
        stuff = np.load(method + "-errorRates" + suffix + ".npz")
        xlist = stuff['xlist']
    else:
        stuff = np.load(method + "-errorRates" + suffix + "-normalized.npz")
        testX = stuff['xlist']
        for ind, marks in enumerate(testX):
            if not np.all(xlist == marks):
                print("Warning for " + method + " test " + str(ind) + ": Different numbers of markers")
    
    if allQ and not filterQ and't-test' in method:
        print("Editing")
        ylist = stuff['ylist'][1:]
    else:
        ylist = stuff['ylist']
    
    rates.append(ylist.mean(axis=0))

    
np.savez("ave-errorRates" + suffix, errors=rates, xvals=xlist, ncells=5000)

Again, for the t-test, here is the average error over all 10 trials.  Later, I do the difference between the 9 trials and 10 trials - the 10 trials shows consistently higher error than than the 9 trial average (even though the 10th trial is actually using slightly more markers).  Not sure which to report.

In [91]:
backup = rates

In [94]:
backup[2]

array([1371.6,  959.7,  808.2,  721.5,  675.5,  647.2,  614.5,  611.1,
        595.7,  577.8,  562.9,  561.8,  563.1,  559.1,  550.3,  544.8,
        540.2,  546.6,  538.3,  537. ,  535.6,  531.6,  528.5,  530.3,
        529.9,  533.5,  527.5,  521.3,  514.8,  518.5,  519.3,  526.8,
        522.8,  521.3,  519.9,  519.4,  520.2,  519.4,  516.8,  516.5,
        520.1,  513.5,  512.6,  509.5,  508.5,  507.2,  511.4,  516. ,
        509.5,  516.3,  507.3,  510.6,  510.6,  502.8,  508.6,  504.2,
        498.4,  498. ,  500.2,  498. ,  495.4,  494.3,  497.7,  503.8,
        500.7,  497. ,  495.8,  490.8,  491.5,  493.3,  505.3,  497.2,
        494.7,  495.1,  496.8,  490.9,  489.9,  487.2,  491.5,  485.9,
        492.2,  491.5,  492.9,  489.8,  493.1,  493.8,  487.9,  493.7,
        488.3,  486.6,  492.6,  485.1,  487.3,  485.2,  480. ,  491.6,
        497.2,  487.7,  484.7,  489.3,  483.5,  489.2,  492.2,  483.1,
        481.5,  489.1,  480.4,  477.7,  485.7,  492.1])

In [93]:
rates[2]-backup[2]

array([ 1.51111111, -3.58888889, -4.08888889, -1.5       , -0.5       ,
       -1.42222222, -1.83333333, -1.43333333, -3.7       , -2.91111111,
       -1.01111111,  0.53333333, -3.65555556, -2.54444444, -4.07777778,
       -1.68888889, -2.53333333, -2.37777778, -6.07777778, -3.77777778,
       -1.6       , -2.48888889, -1.61111111, -3.74444444, -2.9       ,
       -3.5       , -1.83333333, -1.74444444, -2.68888889, -2.38888889,
       -3.63333333, -2.13333333, -3.57777778, -2.41111111, -3.56666667,
       -2.28888889, -1.53333333, -0.06666667, -3.02222222, -2.5       ,
       -3.43333333, -0.05555556, -3.6       , -2.16666667, -0.83333333,
       -3.53333333, -1.84444444, -3.44444444, -1.94444444, -1.63333333,
       -2.52222222, -3.93333333, -1.37777778,  0.75555556, -4.37777778,
       -1.42222222, -0.4       , -1.33333333, -1.53333333, -2.88888889,
       -2.4       , -4.3       , -3.7       ,  0.08888889, -1.7       ,
       -2.22222222, -2.68888889, -1.8       ,  0.16666667, -5.41

## Run the procedure for one number of markers to select

In [139]:
path, fName = makePath(0, allQ = True)
adata, ft, lookup = makeAdata(path, fName)
folds = ft.folds
pvalMarkerList = np.load(path + dataset + "-" + method + "-markerList.npz")['marks']
pvalScoreList = np.load(path+ dataset + "-" + method + "-markerList.npz")['pvals']

In [58]:
%%time

xlist = []
ylist = []

for marks_per_clust in [250]:
    
    print("markers per cluster: {}".format(marks_per_clust), flush=True)
    # every method other than elastic nets:
    #myMarks = [list(set().union(*table[:,:marks_per_clust])) for table in pvalMarkerList]
    # elastic nets:
    myMarks = [list(set().union(*[marks[:marks_per_clust] for marks in table])) for table in pvalMarkerList]
    ft.markers = myMarks
    
    print("Number of markers per fold: {}".format([len(a) for a in ft.markers]))
    xlist.append(np.array([len(a) for a in ft.markers]).mean())
    
    ft.classify(RandomForest)
    perf = PerformanceReport(np.array(adata.obs['zhengsim_clusters'].values).astype(int), ft.yhat)
    ylist.append(perf.wrong())

markers per cluster: 250
Number of markers per fold: [250, 250, 250, 250, 250]
CPU times: user 22.9 s, sys: 181 ms, total: 23.1 s
Wall time: 7.25 s


In [59]:
%%time
xlist = []
ylist = []

for marks_per_clust in [250]:
    
    print("markers per cluster: {}".format(marks_per_clust), flush=True)
    # every method other than elastic nets:
    #myMarks = [list(set().union(*table[:,:marks_per_clust])) for table in pvalMarkerList]
    # elastic nets:
    myMarks = [list(set().union(*[marks[:marks_per_clust] for marks in table])) for table in pvalMarkerList]
    ft.markers = myMarks
    
    print("Number of markers per fold: {}".format([len(a) for a in ft.markers]))
    xlist.append(np.array([len(a) for a in ft.markers]).mean())
    
    ft.classify(RandomForest)
    wrong = np.sum(  (np.array(adata.obs['zhengsim_clusters'].values).astype(int).flatten() != ft.yhat) * 1.0 )
    ylist.append(wrong)

markers per cluster: 250
Number of markers per fold: [250, 250, 250, 250, 250]
CPU times: user 22.9 s, sys: 177 ms, total: 23.1 s
Wall time: 7.25 s


In [45]:
ylist

[609.0]

In [53]:
np.sum(  (np.array(adata.obs['zhengsim_clusters'].values).astype(int).flatten() != ft.yhat) * 1.0 )

609.0

## Compute true positive data

In [14]:
import plotly.graph_objs as go
from plotly.offline import plot, iplot, init_notebook_mode

In [15]:
init_notebook_mode(connected=True)

Plot distribution of differential expression factors.

In [133]:
allQ = True
filterQ = False

In [135]:
def factorPlot(test, allQ, filterQ):

    if allQ:
        path = "/home/ahsvargo/turbo/scData/zheng17/splatter-bCells/allGenes/" + str(test) + "/"
    else: 
        path = "/home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/" + str(test) + "/"

    if allQ and filterQ:
        suffix = "-allFilt.npz"
        nGenes = 4999

        filt = np.load(path + "filtered-splat-deInfo.npz")
        genes = filt['genes']
        facs = filt['facs']

    elif allQ and not filterQ:
        suffix = ".npz"
        nGenes = 32738

        deGenes = np.loadtxt(path + "deGenes.dat")
        genes = deGenes[:,0].astype(int)
        facs = deGenes[:,1].flatten()

    else:
        suffix = "-preFilt.npz"
        nGenes = 5000

        deGenes = np.loadtxt(path + "deGenes.dat")
        genes = deGenes[:,0].astype(int)
        facs = deGenes[:,1].flatten()

    print("Making histogram now", flush=True)
    data = [go.Histogram(x=facs)]
    return data


In [136]:
iplot(factorPlot(0, True, False))

Making histogram now


In [138]:
iplot(factorPlot(0, True, True))

Making histogram now


In [30]:
#marks_list = list(2*np.array(range(1,51))) + list(5*np.array(range(21,61))) + list(10*np.array(range(31,51)))

In [43]:
methods = ['wilcoxon', 't-test_overestim_var', 'logreg']
method = methods[2]

In [52]:
allQ = True
filterQ = False
nCells = 5000

# do we want to look at precision?
precCurve = True


if allQ and filterQ:
    suffix = "-allFilt.npz"
    totGenes = 4999
    
elif allQ and not filterQ:
    suffix = ".npz"
    totGenes = 32738
    
else:
    suffix = "-preFilt.npz"
    totGenes = 5000
    
numPoints = 100

# for the precision curve: just look at the first 500 genes selected
nGenes = totGenes
if precCurve:
    nGenes = 500 
    numPoints = 250
    
chunk = np.floor(nGenes/numPoints)
marks_list = np.arange(0, nGenes, chunk)
marks_list[0] = 2
if marks_list[-1] != nGenes:
    tmp = list(marks_list)
    tmp.append(nGenes)
    marks_list = np.array(tmp)
    
if precCurve:
    marks_list = marks_list[1:]

print("Loading data from 10tests-" + method + "-valsNmarks" + suffix)
stuff = np.load("10tests-" + method + "-valsNmarks" + suffix)
pvals = stuff['pvals']
marks = stuff['marks']

marks_list

Loading data from 10tests-logreg-valsNmarks.npz


array([  2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.,  20.,  22.,
        24.,  26.,  28.,  30.,  32.,  34.,  36.,  38.,  40.,  42.,  44.,
        46.,  48.,  50.,  52.,  54.,  56.,  58.,  60.,  62.,  64.,  66.,
        68.,  70.,  72.,  74.,  76.,  78.,  80.,  82.,  84.,  86.,  88.,
        90.,  92.,  94.,  96.,  98., 100., 102., 104., 106., 108., 110.,
       112., 114., 116., 118., 120., 122., 124., 126., 128., 130., 132.,
       134., 136., 138., 140., 142., 144., 146., 148., 150., 152., 154.,
       156., 158., 160., 162., 164., 166., 168., 170., 172., 174., 176.,
       178., 180., 182., 184., 186., 188., 190., 192., 194., 196., 198.,
       200., 202., 204., 206., 208., 210., 212., 214., 216., 218., 220.,
       222., 224., 226., 228., 230., 232., 234., 236., 238., 240., 242.,
       244., 246., 248., 250., 252., 254., 256., 258., 260., 262., 264.,
       266., 268., 270., 272., 274., 276., 278., 280., 282., 284., 286.,
       288., 290., 292., 294., 296., 298., 300., 30

In [53]:
fullTP = []
fullFP = []
nMarks = []


# the test value
for test in range(10):
    
    print("Collecting true positive info for test {}".format(test))
        
    # load all of the data from the test
    if allQ:
        path = "/home/ahsvargo/turbo/scData/zheng17/splatter-bCells/allGenes/" + str(test) + "/"
    else: 
        path = "/home/ahsvargo/turbo/scData/zheng17/splatter-bCells/filtered/" + str(test) + "/"

    if allQ and filterQ:
        filt = np.load(path + "filtered-splat-deInfo.npz")
        genes = filt['genes']
        facs = filt['facs']
    elif allQ and not filterQ:
        deGenes = np.loadtxt(path + "deGenes.dat")
        genes = deGenes[:,0].astype(int)
        facs = deGenes[:,1].flatten()
    else:
        deGenes = np.loadtxt(path + "deGenes.dat")
        genes = deGenes[:,0].astype(int)
        facs = deGenes[:,1].flatten()

    nMarks.append(genes.shape[0])
    currMarks = marks[test][0]

    testTP = []
    testFP = []

    # number of markers
    for marks_per_clust in marks_list:
        marks_per_clust = int(marks_per_clust)
        # for these tests - we are currently assuming rankby_abs = True and thus both 
        # lists of markers are the same (I have tested this).  Thus, we just need 
        # the top markers from the first list in marks
        myMarks = currMarks[:marks_per_clust] 
    
        correct = 0
        for marker in myMarks: 
            if marker in genes:
                correct += 1
                #ind = np.where(genes==mark)[0]
                #print(deGenes[ind,1])
                
        testTP.append(correct)
        testFP.append(marks_per_clust - correct)

    fullTP.append(testTP)
    fullFP.append(testFP)

Collecting true positive info for test 0
Collecting true positive info for test 1
Collecting true positive info for test 2
Collecting true positive info for test 3
Collecting true positive info for test 4
Collecting true positive info for test 5
Collecting true positive info for test 6
Collecting true positive info for test 7
Collecting true positive info for test 8
Collecting true positive info for test 9


Save all of the information

In [54]:
def avePrec(fullTP, marks_list):
    return np.array(fullTP).mean(axis=0)/marks_list

def aveTPR(fullTP, nmarks):
    return ( np.array(fullTP) / np.array(nmarks)[:, None] ).mean(axis=0)

def aveFPR(fullFP, nmarks, ngenes):
    denom = ngenes - np.array(nmarks)
    return ( np.array(fullFP)/denom[:, None] ).mean(axis=0)

In [55]:
prec = avePrec(fullTP, marks_list)
tpr = aveTPR(fullTP, nMarks)
fpr = aveFPR(fullFP, nMarks, totGenes)

fname = "10tests-" + method + "-500statInfo" + suffix
print("Saving data to " + fname)
np.savez(fname, fullTP=fullTP, fullFP=fullFP, nmarks=nMarks, marks_list=marks_list, prec=prec, tpr=tpr, fpr=fpr)

Saving data to 10tests-logreg-500statInfo.npz


In [29]:
nGenes

500

In [85]:
np.array(fullFP)

array([[    0,   114,   342, ..., 29107, 29399, 29432],
       [    0,   120,   342, ..., 29056, 29356, 29389],
       [    0,    93,   322, ..., 29069, 29362, 29396],
       ...,
       [    0,   128,   373, ..., 29158, 29449, 29485],
       [    0,   110,   355, ..., 29087, 29388, 29420],
       [    0,   109,   362, ..., 29199, 29489, 29522]])

In [92]:
(np.array(fullTP)/np.array(nMarks)[:,None]).mean(axis=0)

array([6.07506082e-04, 6.62212668e-02, 9.43885405e-02, 1.14176406e-01,
       1.32290055e-01, 1.48201522e-01, 1.62494475e-01, 1.75281995e-01,
       1.88180483e-01, 2.00879749e-01, 2.13145215e-01, 2.24384205e-01,
       2.36519836e-01, 2.46613847e-01, 2.57221143e-01, 2.67356227e-01,
       2.77836314e-01, 2.88423416e-01, 2.98452307e-01, 3.08540458e-01,
       3.17587666e-01, 3.27798223e-01, 3.36629951e-01, 3.46414426e-01,
       3.55975729e-01, 3.65634491e-01, 3.75066253e-01, 3.84365981e-01,
       3.93568658e-01, 4.02296165e-01, 4.11831800e-01, 4.21452218e-01,
       4.31079680e-01, 4.39840338e-01, 4.49619777e-01, 4.59875944e-01,
       4.69527432e-01, 4.78242121e-01, 4.86439811e-01, 4.94982589e-01,
       5.03149582e-01, 5.11746411e-01, 5.19245572e-01, 5.27470456e-01,
       5.36343220e-01, 5.45940722e-01, 5.54020991e-01, 5.62520443e-01,
       5.70427281e-01, 5.79010324e-01, 5.86687064e-01, 5.95105538e-01,
       6.04253469e-01, 6.11449939e-01, 6.19337300e-01, 6.27529876e-01,
      

In [95]:
denom = nGenes - np.array(nMarks)
( np.array(fullFP)/denom[:, None] ).mean(axis=0)

array([0.        , 0.00370187, 0.01165545, 0.02054642, 0.02962791,
       0.03895374, 0.04845945, 0.05813514, 0.06779712, 0.07748304,
       0.08721639, 0.09706529, 0.10681591, 0.11679398, 0.12671428,
       0.13668532, 0.14661913, 0.15653918, 0.16652399, 0.176502  ,
       0.18659537, 0.1965598 , 0.20667691, 0.21668887, 0.22672448,
       0.23674998, 0.24679906, 0.25686538, 0.26694182, 0.27707272,
       0.28711179, 0.29714058, 0.30716946, 0.31729323, 0.32730513,
       0.33726258, 0.34728798, 0.35741873, 0.36760721, 0.37775846,
       0.38795033, 0.3980947 , 0.40836129, 0.41854633, 0.42866017,
       0.43869246, 0.44889456, 0.45904905, 0.46927165, 0.47941585,
       0.48966201, 0.49982681, 0.50991011, 0.52021067, 0.53043303,
       0.54062143, 0.55073158, 0.56093023, 0.57121735, 0.5815316 ,
       0.59176757, 0.60202746, 0.6123248 , 0.62248961, 0.63269515,
       0.64295155, 0.65323847, 0.66350859, 0.67372095, 0.68403852,
       0.69423373, 0.70443224, 0.7146445 , 0.72481257, 0.73499

In [88]:
nMarks

[3306, 3349, 3342, 3266, 3385, 3269, 3226, 3253, 3318, 3216]

In [56]:
tpr

array([0.00390903, 0.06244573, 0.08930294, 0.10553032, 0.12048612,
       0.13549339, 0.14748305, 0.16298019, 0.1785971 , 0.19037291,
       0.19976532, 0.21345369, 0.22614069, 0.2358647 , 0.24716136,
       0.25619812, 0.26652738, 0.2776445 , 0.28699712, 0.2981913 ,
       0.30809828, 0.31691157, 0.32870104, 0.3376415 , 0.34683955,
       0.35567694, 0.36608215, 0.37601017, 0.38631326, 0.39527967,
       0.40271204, 0.41136063, 0.42093612, 0.4279904 , 0.43698388,
       0.44767039, 0.45756817, 0.46996303, 0.47893913, 0.48689029,
       0.49764171, 0.50700181, 0.51619789, 0.52593335, 0.53645417,
       0.54681099, 0.55674675, 0.56593665, 0.57433871, 0.58204326,
       0.59238804, 0.60022573, 0.60945096, 0.61690404, 0.62531651,
       0.63328675, 0.64164647, 0.65178524, 0.65881275, 0.66739767,
       0.67582381, 0.68499567, 0.69448803, 0.7038185 , 0.71287412,
       0.72361497, 0.7314136 , 0.73882875, 0.74622571, 0.75384362,
       0.76374776, 0.7705518 , 0.77738841, 0.78694392, 0.79300

In [59]:
fpr

array([0.00044576, 0.0071334 , 0.01021067, 0.01206051, 0.01375306,
       0.01546979, 0.01682846, 0.01858847, 0.02037176, 0.02170839,
       0.02277806, 0.02433812, 0.02578708, 0.0269021 , 0.02819531,
       0.02921995, 0.03040162, 0.03167233, 0.03274256, 0.03401226,
       0.03514974, 0.03615245, 0.0374889 , 0.0385148 , 0.03956217,
       0.04056459, 0.04174523, 0.04288245, 0.04406445, 0.04509   ,
       0.04593689, 0.04691689, 0.048009  , 0.04881115, 0.04983638,
       0.05106304, 0.05220066, 0.05362859, 0.054654  , 0.05556863,
       0.05679446, 0.05786455, 0.05891195, 0.06002684, 0.06123082,
       0.06241209, 0.06354923, 0.0645967 , 0.06555511, 0.06642327,
       0.06760473, 0.06849598, 0.069543  , 0.0703896 , 0.07134789,
       0.07226226, 0.07322123, 0.07438054, 0.07518301, 0.07616388,
       0.07712197, 0.07816969, 0.07926291, 0.0803334 , 0.08135777,
       0.08258375, 0.08347553, 0.08432265, 0.08516999, 0.08603927,
       0.08717679, 0.08795736, 0.08873751, 0.08982991, 0.09052

In [34]:
np.array(fullTP)

array([[  2,  32,  49, ..., 534, 538, 538],
       [  2,  29,  40, ..., 497, 497, 497],
       [  2,  35,  51, ..., 530, 534, 534],
       ...,
       [  2,  22,  35, ..., 494, 496, 496],
       [  2,  34,  45, ..., 500, 507, 507],
       [  2,  27,  40, ..., 485, 490, 490]])

In [60]:
np.array(fullFP)

array([[   0,   17,   49, ..., 4415, 4460, 4461],
       [   0,   20,   58, ..., 4452, 4501, 4502],
       [   0,   14,   47, ..., 4419, 4464, 4465],
       ...,
       [   0,   27,   63, ..., 4455, 4502, 4503],
       [   0,   15,   53, ..., 4449, 4491, 4492],
       [   0,   22,   58, ..., 4464, 4508, 4509]])

In [27]:
np.array(nMarks).mean()

512.2

In [37]:
np.array(fullTP).mean(axis=0)/marks_list

array([1.        , 0.65306122, 0.46734694, 0.36802721, 0.31479592,
       0.28326531, 0.25680272, 0.24314869, 0.23316327, 0.22086168,
       0.20857143, 0.2025974 , 0.19676871, 0.18948195, 0.18440233,
       0.17836735, 0.17397959, 0.17058824, 0.16655329, 0.16390977,
       0.16091837, 0.15762877, 0.15602968, 0.15332742, 0.15093537,
       0.14857143, 0.14701727, 0.14542706, 0.14409621, 0.14236453,
       0.14020408, 0.13857801, 0.13737245, 0.13543599, 0.13421369,
       0.13358601, 0.13276644, 0.13270822, 0.13168636, 0.13045526,
       0.13      , 0.12921852, 0.12842566, 0.12781206, 0.12741187,
       0.12698413, 0.12648625, 0.12583587, 0.12504252, 0.12411495,
       0.12379592, 0.12296919, 0.12244898, 0.12160185, 0.12097506,
       0.12029685, 0.11971574, 0.11947726, 0.11868403, 0.1181944 ,
       0.11768707, 0.11733021, 0.11705069, 0.11674765, 0.11639031,
       0.11632653, 0.11580087, 0.11522997, 0.11467587, 0.11416741,
       0.11402332, 0.1134234 , 0.11284014, 0.11266424, 0.11199

In [53]:
(np.array(fullTP)/np.array(nMarks)[:, None]).mean(axis=0)

array([0.00390903, 0.06244573, 0.08930294, 0.10553032, 0.12048612,
       0.13549339, 0.14748305, 0.16298019, 0.1785971 , 0.19037291,
       0.19976532, 0.21345369, 0.22614069, 0.2358647 , 0.24716136,
       0.25619812, 0.26652738, 0.2776445 , 0.28699712, 0.2981913 ,
       0.30809828, 0.31691157, 0.32870104, 0.3376415 , 0.34683955,
       0.35567694, 0.36608215, 0.37601017, 0.38631326, 0.39527967,
       0.40271204, 0.41136063, 0.42093612, 0.4279904 , 0.43698388,
       0.44767039, 0.45756817, 0.46996303, 0.47893913, 0.48689029,
       0.49764171, 0.50700181, 0.51619789, 0.52593335, 0.53645417,
       0.54681099, 0.55674675, 0.56593665, 0.57433871, 0.58204326,
       0.59238804, 0.60022573, 0.60945096, 0.61690404, 0.62531651,
       0.63328675, 0.64164647, 0.65178524, 0.65881275, 0.66739767,
       0.67582381, 0.68499567, 0.69448803, 0.7038185 , 0.71287412,
       0.72361497, 0.7314136 , 0.73882875, 0.74622571, 0.75384362,
       0.76374776, 0.7705518 , 0.77738841, 0.78694392, 0.79300