# scRFEjun19

In [154]:
# AnnDatasubset

In [42]:
# Imports 
import numpy as np
import pandas as pd
import scanpy as sc
import random
from anndata import read_h5ad
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
import seaborn as sns
import matplotlib.pyplot as plt
import scanpy.external as sce
import logging as logg

In [97]:
adataLiver = read_h5ad('/Users/madelinepark/Downloads/Liver_droplet.h5ad')

In [113]:
def filterNormalize (dataMatrix, classOfInterest):
    np.random.seed(644685)
    sc.logging.print_versions()
    sc.settings.verbosity = 3      
    sc.logging.print_versions()
    dataMatrix.obs['n_counts'] = dataMatrix.X.sum(axis=1).A1
    sc.pp.filter_cells(dataMatrix, min_genes=250)
    sc.pp.filter_genes(dataMatrix, min_cells=3)
    dataMatrix = dataMatrix[dataMatrix.obs['n_counts'] > 1500, :]
    sc.pp.normalize_per_cell(dataMatrix, counts_per_cell_after=1e5)
    sc.pp.log1p(dataMatrix)
    dataMatrix.raw = dataMatrix
    dataMatrix = dataMatrix[dataMatrix.obs[classOfInterest]!='nan']
    return dataMatrix

In [114]:
def labelSplit (dataMatrix, classOfInterest, labelOfInterest):
    dataMatrix = filterNormalize (dataMatrix, classOfInterest)
    dataMatrix.obs['classification_group'] = 'B'
    dataMatrix.obs.loc[dataMatrix.obs[dataMatrix.obs[classOfInterest]==labelOfInterest]
                   .index,'classification_group'] = 'A'
    return dataMatrix

In [115]:
def downsampleToSmallestCategory(dataMatrix, 
        classOfInterest = 'classification_group',
        random_state = None,
        min_cells = 15,
        keep_small_categories = False
) -> sc.AnnData:
    """
    returns an annData object in which all categories in 'classOfInterest' have
    the same size
    classOfInterest
        column with the categories to downsample
    min_cells
        Minimum number of cells to downsample.
        Categories having less than `min_cells` are discarded unless
        keep_small_categories is True
    keep_small_categories
        Be default categories with less than min_cells are discarded.
        Set to true to keep them
    """

    counts = dataMatrix.obs[classOfInterest].value_counts(sort=False)
    if len(counts[counts < min_cells]) > 0 and keep_small_categories is False:
        logg.warning(
            "The following categories have less than {} cells and will be "
            "ignored: {}".format(min_cells, dict(counts[counts < min_cells]))
        )
    min_size = min(counts[counts >= min_cells])
    sample_selection = None
    for sample, num_cells in counts.items():
        if num_cells <= min_cells:
            if keep_small_categories:
                sel = dataMatrix.obs.index.isin(
                    dataMatrix.obs[dataMatrix.obs[classOfInterest] == sample].index)
            else:
                continue
        else:
            sel = dataMatrix.obs.index.isin(
                dataMatrix.obs[dataMatrix.obs[classOfInterest] == sample]
                .sample(min_size, random_state=random_state)
                .index
            )
        if sample_selection is None:
            sample_selection = sel
        else:
            sample_selection |= sel
    logg.info(
        "The cells in category {!r} had been down-sampled to have each {} cells. "
        "The original counts where {}".format(classOfInterest, min_size, dict(counts))
    )
    return dataMatrix[sample_selection].copy()

In [149]:
def makeOneForest (dataMatrix, classOfInterest, labelOfInterest, nEstimators = 5000, 
                   randomState = 0,  nJobs = -1, oobScore = True, Step = 0.2, Cv = 5): 
    """
    Builds and runs a random forest for one label in a class of interest
    
    Parameters
    ----------
    dataMatrix : anndata object
        The data file of interest
    classOfInterest : str
        The class you will split the data by in the set of dataMatrix.obs
    labelOfInterest : str
        The specific label within the class that the random forezt will run a 
        "one vs all" classification on
    nEstimators : int
        The number of trees in the forest
    randomState : int
        Controls random number being used
    nJobs : int
        The number of jobs to run in parallel
    oobScore : bool
        Whether to use out-of-bag samples to estimate the generalization accuracy
    Step : float
        Corresponds to percentage of features to remove at each iteration
    Cv : int
        Determines the cross-validation splitting strategy
        
    Returns
    -------
    feature_selected : list
        list of top features from random forest
    selector.estimator_.feature_importances_ : list
        list of top ginis corresponding to to features
    
    """
    splitDataMatrix = labelSplit (dataMatrix, classOfInterest, labelOfInterest)
    downsampledMatrix = downsampleToSmallestCategory (dataMatrix = splitDataMatrix, 
        classOfInterest = 'classification_group',
        random_state = None, min_cells = 15, keep_small_categories = False)
    print('downsampled Matrix')
    
    display (downsampledMatrix.obs['classification_group'])
    print(set(downsampledMatrix.obs['classification_group']))
    
    feat_labels = downsampledMatrix.var_names 
    X = downsampledMatrix.X
    y = downsampledMatrix.obs['classification_group']
    
    clf = RandomForestClassifier(n_estimators = nEstimators, random_state = randomState, 
                                 n_jobs = nJobs, oob_score = oobScore)
    selector = RFECV(clf, step = Step, cv = Cv)
    
    clf.fit(X, y)
    selector.fit(X, y)
    feature_selected = feat_labels[selector.support_] 
#     display(downsampledMatrix.obs)

    dataMatrix.obs['classification_group'] = 'B'
#     print('corresponding')
#     display(downsampledMatrix.obs)

    return feature_selected, selector.estimator_.feature_importances_

In [150]:
def resultWrite (classOfInterest, results_df, labelOfInterest,
                feature_selected, feature_importance):

    column_headings = [] 
    column_headings.append(labelOfInterest)
    column_headings.append(labelOfInterest + '_gini')
    resaux = pd.DataFrame(columns = column_headings)
    resaux[labelOfInterest] = feature_selected
    resaux[labelOfInterest + '_gini'] = feature_importance
    resaux = resaux.sort_values(by = [labelOfInterest + '_gini'], ascending = False)
    resaux.reset_index(drop = True, inplace = True)

    results_df = pd.concat([results_df, resaux], axis=1)
    return results_df 

In [151]:
def scRFE(dataMatrix, classOfInterest, nEstimators = 5000, randomState = 0,  
                  nJobs = -1, oobScore = True, Step = 0.2, Cv = 5):
    """
    Builds and runs a random forest with one vs all classification for each label 
    for one class of interest
    
    Parameters
    ----------
    dataMatrix : anndata object
        The data file of interest
    classOfInterest : str
        The class you will split the data by in the set of dataMatrix.obs
    labelOfInterest : str
        The specific label within the class that the random forezt will run a 
        "one vs all" classification on
    nEstimators : int
        The number of trees in the forest
    randomState : int
        Controls random number being used
    nJobs : int
        The number of jobs to run in parallel
    oobScore : bool
        Whether to use out-of-bag samples to estimate the generalization accuracy
    Step : float
        Corresponds to percentage of features to remove at each iteration
    Cv : int
        Determines the cross-validation splitting strategy
        
    Returns
    -------
    results_df : pd.DataFrame
        Dataframe with results for each label in the class, formatted as 
        "label" for one column, then "label + gini" for the corresponding column
    
    """
    
    dataMatrix = filterNormalize (dataMatrix, classOfInterest)
    results_df = pd.DataFrame()
    for labelOfInterest in np.unique(dataMatrix.obs[classOfInterest]): 
        
        feature_selected, feature_importance = makeOneForest(dataMatrix, 
                                                             classOfInterest, 
                          labelOfInterest = labelOfInterest)
    
        results_df = resultWrite (classOfInterest, results_df, 
                            labelOfInterest = labelOfInterest, 
                    feature_selected = feature_selected,  
                    feature_importance = feature_importance)
        
#     finaldf = makeOneForest (dataMatrix, classOfInterest, labelOfInterest = labelOfInterest)[2]

    return results_df

In [152]:
liverAgeSmall = scRFE (dataMatrix = adataLiver, classOfInterest = 'age', 
                       nEstimators = 10, randomState = 0,  
                  nJobs = -1, oobScore = True, Step = 0.2, Cv = 3)

scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
filtered out 1 genes that are detected in less than 3 cells


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


downsampled Matrix


index
AAAGCAATCGGAAATA-1-9-0-0       A
AAAGTAGAGGCCCTTG-1-9-0-0       A
AACCGCGAGAAACCGC-1-9-0-0       A
AACTCCCAGTTGTCGT-1-9-0-0       A
AACTCCCGTAACGTTC-1-9-0-0       A
AACTCTTCAAAGTCAA-1-9-0-0       A
AAGGAGCGTTAAAGAC-1-9-0-0       A
AAGGCAGTCTTTACAC-1-9-0-0       A
AAGGTTCAGTGGGCTA-1-9-0-0       A
ACACTGAAGTCGATAA-1-9-0-0       A
ACATCAGCAAGCGATG-1-9-0-0       A
ACATGGTAGTCCCACG-1-9-0-0       A
ACCAGTACATTGGGCC-1-9-0-0       A
ACCCACTAGCCCAATT-1-9-0-0       A
ACCTTTAAGTTATCGC-1-9-0-0       A
ACGAGCCCAGCCAATT-1-9-0-0       A
ACGATACCACATTAGC-1-9-0-0       A
ACGATACGTCTAAACC-1-9-0-0       A
ACGATGTAGTTTGCGT-1-9-0-0       A
ACGCAGCAGGACGAAA-1-9-0-0       A
ACGCCAGGTTACAGAA-1-9-0-0       A
ACGGGTCCATCGGACC-1-9-0-0       A
ACGTCAATCGCTTGTC-1-9-0-0       A
ACGTCAATCTGCCCTA-1-9-0-0       A
ACTATCTCATCAGTAC-1-9-0-0       A
ACTATCTGTGCCTTGG-1-9-0-0       A
ACTATCTGTGGTGTAG-1-9-0-0       A
ACTATCTTCATAGCAC-1-9-0-0       A
ACTGAACGTTCCGGCA-1-9-0-0       A
ACTGATGGTCAGGACA-1-9-0-0       A
    

{'A', 'B'}


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


downsampled Matrix


index
AAAGTAGAGGCCCTTG-1-9-0-0       B
AACCGCGAGAAACCGC-1-9-0-0       B
AACTCTTCAAAGTCAA-1-9-0-0       B
AAGGCAGTCTTTACAC-1-9-0-0       B
AAGGTTCAGTGGGCTA-1-9-0-0       B
ACATCAGCAAGCGATG-1-9-0-0       B
ACCAGTACATTGGGCC-1-9-0-0       B
ACCCACTAGCCCAATT-1-9-0-0       B
ACGCAGCAGGACGAAA-1-9-0-0       B
ACGCCAGGTTACAGAA-1-9-0-0       B
ACGGGTCCATCGGACC-1-9-0-0       B
ACGTCAATCGCTTGTC-1-9-0-0       B
ACTATCTCATCAGTAC-1-9-0-0       B
ACTATCTGTGGTGTAG-1-9-0-0       B
ACTATCTTCATAGCAC-1-9-0-0       B
ACTGAACGTTCCGGCA-1-9-0-0       B
ACTGATGGTCAGGACA-1-9-0-0       B
ACTGCTCCACTACAGT-1-9-0-0       B
AGAATAGAGAACAATC-1-9-0-0       B
AGATCTGCACACTGCG-1-9-0-0       B
ATCATCTAGATGCGAC-1-9-0-0       B
ATCATGGTCAAGCCTA-1-9-0-0       B
ATCCACCGTAGAAGGA-1-9-0-0       B
ATCTGCCCATCAGTAC-1-9-0-0       B
ATCTGCCGTGCTCTTC-1-9-0-0       B
ATGAGGGAGTGAACAT-1-9-0-0       B
ATTGGACTCCCAAGTA-1-9-0-0       B
CAACTAGCAACTGCGC-1-9-0-0       B
CAAGATCTCAAGAAGT-1-9-0-0       B
CAAGATCTCGCATGGC-1-9-0-0       B
    

{'A', 'B'}


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


downsampled Matrix


index
AACCGCGAGAAACCGC-1-9-0-0       B
ACTATCTGTGGTGTAG-1-9-0-0       B
ACTGAACGTTCCGGCA-1-9-0-0       B
CAGTCCTCAAGCTGTT-1-9-0-0       B
CCCAGTTGTAGCGTAG-1-9-0-0       B
CCTAGCTAGAGACTAT-1-9-0-0       B
CGATTGATCCGGGTGT-1-9-0-0       B
CGGAGCTGTAAGAGGA-1-9-0-0       B
CTACATTGTTGTCGCG-1-9-0-0       B
GCACATAAGAGAGCTC-1-9-0-0       B
GTCTCGTAGTGGAGTC-1-9-0-0       B
GTGCATAGTAGCACGA-1-9-0-0       B
TGGCGCAGTCCGCTGA-1-9-0-0       B
AACCATGAGCGCTCCA-1-10-0-0      B
ACATCAGTCCAAACTG-1-10-0-0      B
AGAGTGGGTCTGCGGT-1-10-0-0      B
AGATCTGAGAAGGACA-1-10-0-0      B
AGCGTCGGTCTGATTG-1-10-0-0      B
AGCTTGAAGACTACAA-1-10-0-0      B
ATTTCTGCACTTCGAA-1-10-0-0      B
CACAAACGTAGCGATG-1-10-0-0      B
CATATTCAGTGCTGCC-1-10-0-0      B
CATCAGACAGCCAGAA-1-10-0-0      B
CCTATTACAGTAAGCG-1-10-0-0      B
CGTCAGGCACGGCCAT-1-10-0-0      B
CTCATTATCCATGAAC-1-10-0-0      B
CTCGAAACATCCCATC-1-10-0-0      B
CTTTGCGCATGAACCT-1-10-0-0      B
GATCTAGAGTAATCCC-1-10-0-0      B
GCCAAATCACGTAAGG-1-10-0-0      B
    

{'A', 'B'}


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


downsampled Matrix


index
CCCAGTTGTAGCGTAG-1-9-0-0       B
GTCTCGTAGTGGAGTC-1-9-0-0       B
GTGCATAGTAGCACGA-1-9-0-0       B
ACATCAGTCCAAACTG-1-10-0-0      B
AGAGTGGGTCTGCGGT-1-10-0-0      B
CATATTCAGTGCTGCC-1-10-0-0      B
GGGAGATGTGAGTATA-1-10-0-0      B
TGTTCCGTCAACGGGA-1-10-0-0      B
CGACTTCTCAACGGCC-1-49-0-0      B
GTCGTAATCCGCAAGC-1-49-0-0      B
TCTCTAAAGTTCGCGC-1-49-0-0      B
AAACCTGCATCATCCC-1-76-0-0      A
AACCGCGGTCTAGGTT-1-76-0-0      A
AACGTTGCAATAAGCA-1-76-0-0      A
AACTCTTAGCCACTAT-1-76-0-0      A
AACTCTTGTCTTGCGG-1-76-0-0      A
AAGCCGCTCAGTGTTG-1-76-0-0      A
AAGGAGCAGACAAAGG-1-76-0-0      A
AAGGTTCGTTCTGAAC-1-76-0-0      A
ACACTGACAGAAGCAC-1-76-0-0      A
ACAGCTATCAGCATGT-1-76-0-0      A
ACTGCTCCATGATCCA-1-76-0-0      A
ACTTTCACACCAGGCT-1-76-0-0      A
AGGCCACAGCGTTCCG-1-76-0-0      A
AGTGGGATCATGTCCC-1-76-0-0      A
ATCGAGTGTTTGGCGC-1-76-0-0      A
ATCTGCCCATAGAAAC-1-76-0-0      A
ATGCGATTCTGGGCCA-1-76-0-0      A
CAACCAAGTGAACCTT-1-76-0-0      A
CAACCTCCAATAGCGG-1-76-0-0      A
    

{'A', 'B'}


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


downsampled Matrix


index
AACCGCGAGAAACCGC-1-9-0-0       B
AACTCTTCAAAGTCAA-1-9-0-0       B
AAGGCAGTCTTTACAC-1-9-0-0       B
AAGGTTCAGTGGGCTA-1-9-0-0       B
ACATCAGCAAGCGATG-1-9-0-0       B
ACCAGTACATTGGGCC-1-9-0-0       B
ACCCACTAGCCCAATT-1-9-0-0       B
ACGCAGCAGGACGAAA-1-9-0-0       B
ACGGGTCCATCGGACC-1-9-0-0       B
ACGTCAATCGCTTGTC-1-9-0-0       B
ACTATCTCATCAGTAC-1-9-0-0       B
ACTATCTGTGGTGTAG-1-9-0-0       B
ACTGAACGTTCCGGCA-1-9-0-0       B
ACTGATGGTCAGGACA-1-9-0-0       B
ACTGCTCCACTACAGT-1-9-0-0       B
AGAATAGAGAACAATC-1-9-0-0       B
AGATCTGCACACTGCG-1-9-0-0       B
ATCATCTAGATGCGAC-1-9-0-0       B
ATCTGCCCATCAGTAC-1-9-0-0       B
ATCTGCCGTGCTCTTC-1-9-0-0       B
ATGAGGGAGTGAACAT-1-9-0-0       B
ATTGGACTCCCAAGTA-1-9-0-0       B
CAACTAGCAACTGCGC-1-9-0-0       B
CAAGATCTCAAGAAGT-1-9-0-0       B
CAAGATCTCGCATGGC-1-9-0-0       B
CACAAACTCTTTACAC-1-9-0-0       B
CACACTCGTCAATACC-1-9-0-0       B
CACAGTAGTAATCGTC-1-9-0-0       B
CACCAGGAGTTGAGTA-1-9-0-0       B
CAGAATCTCATGTCTT-1-9-0-0       B
    

{'A', 'B'}


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 
scanpy==1.4.3 anndata==0.6.21 umap==0.3.9 numpy==1.16.4 scipy==1.2.1 pandas==0.24.2 scikit-learn==0.21.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


Trying to set attribute `.obs` of view, making a copy.
Trying to set attribute `.obs` of view, making a copy.


downsampled Matrix


index
AACCGCGAGAAACCGC-1-9-0-0       B
ACATCAGCAAGCGATG-1-9-0-0       B
ACGCAGCAGGACGAAA-1-9-0-0       B
ACTATCTCATCAGTAC-1-9-0-0       B
ACTATCTGTGGTGTAG-1-9-0-0       B
ACTGAACGTTCCGGCA-1-9-0-0       B
AGAATAGTCCTGTAGA-1-9-0-0       B
ATCATCTAGATGCGAC-1-9-0-0       B
ATGAGGGAGTGAACAT-1-9-0-0       B
CAAGATCTCGCATGGC-1-9-0-0       B
CACAAACTCTTTACAC-1-9-0-0       B
CAGTCCTCAAGCTGTT-1-9-0-0       B
CCCAGTTGTAGCGTAG-1-9-0-0       B
CCGGGATGTAAGTGGC-1-9-0-0       B
CCGTACTGTGGCAAAC-1-9-0-0       B
CCTAGCTAGAGACTAT-1-9-0-0       B
CGATCGGAGAGGTACC-1-9-0-0       B
CGATGTAGTAGCGATG-1-9-0-0       B
CGATTGATCCGGGTGT-1-9-0-0       B
CGGAGCTGTAAGAGGA-1-9-0-0       B
CTACATTGTTGTCGCG-1-9-0-0       B
CTCGAAAAGCCACTAT-1-9-0-0       B
GATGCTAAGCTACCGC-1-9-0-0       B
GCACATAAGAGAGCTC-1-9-0-0       B
GGGCACTAGAGCCCAA-1-9-0-0       B
GTCTCGTAGTGGAGTC-1-9-0-0       B
GTGCATAGTAGCACGA-1-9-0-0       B
GTTCATTTCCGCAGTG-1-9-0-0       B
TGATTTCCATAGAAAC-1-9-0-0       B
TGGCGCAGTCCGCTGA-1-9-0-0       B
    

{'A', 'B'}


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
