 # Cell Selection Summary
 In this notebook I am summarizing the cell selection analysis to add to the
 docs.

In [2]:
SAMPLES = ['testis1', 'testis2', 'testis3', 'testis4']
ORDER = ['cellranger-wf', 'cellranger-force-wf', 'cellranger3-wf', 'droputils']


In [3]:
from itertools import combinations

import numpy as np
import pandas as pd
from sklearn.metrics import jaccard_similarity_score

from tabulate import tabulate


In [4]:
def get_calls(sample):
    df = pd.read_feather(f'../output/cellselection-wf/{sample}_combined_cell_calls.feather').set_index('cell_id')
    return df

def jaccard(sample):
    df = get_calls(sample)
    res = []
    for c1, c2 in combinations(df.columns, 2):
        jc = np.round(jaccard_similarity_score(df[c1], df[c2]), 4)
        res.append([c1, c2, jc])
        res.append([c2, c1, jc])

    dfJ = pd.DataFrame(res, columns = ['method 1', 'method 2', 'Jaccard']).set_index(['method 1', 'method 2']).unstack().fillna(1)
    dfJ.columns = dfJ.columns.droplevel(0)
    dfJ = dfJ.loc[ORDER, ORDER]
    return dfJ


In [5]:
for sample in SAMPLES:
    print(sample)
    print(tabulate(jaccard(sample), headers='keys', tablefmt='github'))
    print('\n\n')


testis1
| method 1            |   cellranger-wf |   cellranger-force-wf |   cellranger3-wf |   droputils |
|---------------------|-----------------|-----------------------|------------------|-------------|
| cellranger-wf       |          1      |                0.8219 |           0.8342 |      0.0537 |
| cellranger-force-wf |          0.8219 |                1      |           0.9771 |      0.1919 |
| cellranger3-wf      |          0.8342 |                0.9771 |           1      |      0.2146 |
| droputils           |          0.0537 |                0.1919 |           0.2146 |      1      |



testis2
| method 1            |   cellranger-wf |   cellranger-force-wf |   cellranger3-wf |   droputils |
|---------------------|-----------------|-----------------------|------------------|-------------|
| cellranger-wf       |          1      |                0.6678 |           0.2083 |      0.2933 |
| cellranger-force-wf |          0.6678 |                1      |           0.5335 |      

 I am thinking about using a consensus measure. If I include cell ranger v2 with defaults then the consensus will be the same.

In [6]:
def consensus(sample):
    print(sample)
    df = get_calls(sample)

    # Consensus of all measure: cell ranger defaults, cell ranger force, cell ranger v3, and droplet utils
    flag_all = df.sum(axis=1) == 4

    # Consensus of cell ranger force, cell ranger v3, and droplet utils
    flag_three = df.iloc[:, 1:].sum(axis=1) == 3

    # Consensus of cell ranger v3, and droplet utils
    flag_two = df.iloc[:, 2:].sum(axis=1) == 2

    print('Number of cells with 4-way consensus: ', sum(flag_all))
    print('Number of cells with 3-way consensus: ', sum(flag_three))
    print('Number of cells with 2-way consensus: ', sum(flag_two))
    print('Jaccard cellranger-wf defaults vs full consensus: ', jaccard_similarity_score(df['cellranger-wf'], flag_all))
    print('Jaccard similarity of consensus with 3 vs 2 measures: ', jaccard_similarity_score(flag_three, flag_two))
    print('Number of different calls between consensus with 3 vs 2 measures: ', sum(flag_three != flag_two))
    print('\n\n')


In [7]:
for sample in SAMPLES:
    consensus(sample)


testis1
Number of cells with 4-way consensus:  476
Number of cells with 3-way consensus:  2717
Number of cells with 2-way consensus:  2790
Jaccard cellranger-wf defaults vs full consensus:  1.0
Jaccard similarity of consensus with 3 vs 2 measures:  0.9948479074034865
Number of different calls between consensus with 3 vs 2 measures:  73



testis2
Number of cells with 4-way consensus:  548
Number of cells with 3-way consensus:  2936
Number of cells with 2-way consensus:  4801
Jaccard cellranger-wf defaults vs full consensus:  1.0
Jaccard similarity of consensus with 3 vs 2 measures:  0.7473584394473043
Number of different calls between consensus with 3 vs 2 measures:  1865



testis3
Number of cells with 4-way consensus:  426
Number of cells with 3-way consensus:  7245
Number of cells with 2-way consensus:  12515
Jaccard cellranger-wf defaults vs full consensus:  1.0
Jaccard similarity of consensus with 3 vs 2 measures:  0.8678270465489567
Number of different calls between consensus wit