In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
import scanpy.api as sc

sc.settings.verbosity = 1  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures
sc.logging.print_versions()
tenx_run = '10X_P4_3'
path = f'/mnt/data/{tenx_run}/'

results_file = f'{path}/{tenx_run}.h5ad'

adata = sc.read(path + 'matrix.mtx', cache=True).T  # transpose the data
adata.var_names = pd.read_csv(path + 'genes.tsv', header=None, sep='\t')[1]
adata.obs_names = pd.read_csv(path + 'barcodes.tsv', header=None)[0]

scanpy==1.3.1 anndata==0.6.10 numpy==1.14.3 scipy==1.1.0 pandas==0.23.4 scikit-learn==0.19.2 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


In [64]:
adata.obs_names

Index(['AAAGTAGAGATGCCAG-1', 'AACCGCGTCCAACCAA-1', 'AACTCCCGTCGGGTCT-1',
       'AACTCTTAGTTGCAGG-1', 'AACTCTTTCATAACCG-1', 'AAGACCTAGATCCGAG-1',
       'AAGACCTAGGTGGGTT-1', 'AAGGAGCGTGCAACTT-1', 'AAGTCTGAGATAGTCA-1',
       'ACAGCTATCATACGGT-1',
       ...
       'TTCTACAAGGCAGTCA-1', 'TTGAACGTCTCTTATG-1', 'TTGCCGTCACGCCAGT-1',
       'TTGGAACGTTAGGGTG-1', 'TTGGCAACATGATCCA-1', 'TTGGCAAGTCCGTTAA-1',
       'TTGTAGGCATCCGGGT-1', 'TTTACTGCACACGCTG-1', 'TTTACTGCAGGACGTA-1',
       'TTTGTCAGTTGCGTTA-1'],
      dtype='object', name=0, length=149)

In [63]:
adata.var_names

Index(['Xkr4', 'Rp1', 'Sox17', 'Mrpl15', 'Lypla1', 'Tcea1', 'Rgs20', 'Atp6v1h',
       'Oprk1', 'Npbwr1',
       ...
       'ERCC-00163', 'ERCC-00164', 'ERCC-00165', 'ERCC-00168', 'ERCC-00170',
       'ERCC-00171', 'Gfp_transgene', 'Cre_transgene', 'Tdtom_transgene',
       'zsGreen_transgene'],
      dtype='object', name=1, length=23433)

In [32]:
adata

AnnData object with n_obs × n_vars = 149 × 23433 

In [33]:
! aws s3 ls s3://czbiohub-maca/gc_table_by_plates_processed_remux_redux/

2017-10-17 10:34:24   17952446 B000126.htseq-count-by-cell.csv
2017-10-17 10:34:24   17472437 B000127.htseq-count-by-cell.csv
2017-10-17 10:34:24    9342072 B000166.htseq-count-by-cell.csv
2017-10-17 10:34:24   12556466 B000167.htseq-count-by-cell.csv
2017-10-17 10:34:24   13003437 B000168.htseq-count-by-cell.csv
2017-10-17 10:34:39    3436244 B000404.htseq-count-by-cell.csv
2017-10-17 10:34:41   10815682 B000412.htseq-count-by-cell.csv
2017-10-17 10:34:49   10653949 B000610.htseq-count-by-cell.csv
2017-10-17 10:34:50    1922093 B000621.htseq-count-by-cell.csv
2017-10-17 10:34:51   14258117 B000633.htseq-count-by-cell.csv
2017-10-17 10:34:52    9405394 B000634.htseq-count-by-cell.csv
2017-10-17 10:34:59   16489061 B000636.htseq-count-by-cell.csv
2017-10-17 10:35:01   15289684 B000825.htseq-count-by-cell.csv
2017-10-17 10:35:13   17537394 B000826.htseq-count-by-cell.csv
2017-10-17 10:35:14   17030910 B000827.htseq-count-by-cell.csv
2017-10-17 10:35:16   15897793 B000971.htseq-count-by-c

In [46]:
bladder_plate_to_use = 'D041914'

In [47]:
! aws s3 cp --exclude "*" --include "$bladder_plate_to_use*" --recursive s3://czbiohub-maca/gc_table_by_plates_processed_remux_redux/ /mnt/data/facs_counts/

download: s3://czbiohub-maca/gc_table_by_plates_processed_remux_redux/D041914.htseq-count-by-cell.csv to ../../../../mnt/data/facs_counts/D041914.htseq-count-by-cell.csv


### Read droplet annotations

In [36]:
droplet_annotations = pd.read_csv('/home/ubuntu/tabula-muris/00_data_ingest/03_tissue_annotation_csv/Bladder_droplet_annotation.csv', index_col=0)
print(droplet_annotations.shape)
droplet_annotations.head()

(2500, 11)


Unnamed: 0_level_0,tissue,subtissue,cell_ontology_class,cell_ontology_id,free_annotation,cluster.ids,mouse.sex,mouse.id,tSNE_1,tSNE_2,channel
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10X_P4_3_AAAGTAGAGATGCCAG,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,0,M,3-M-8,-28.034252,-13.407488,10X_P4_3
10X_P4_3_AACCGCGTCCAACCAA,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3-M-8,-15.890827,7.453387,10X_P4_3
10X_P4_3_AACTCCCGTCGGGTCT,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,M,3-M-8,-11.131739,8.426802,10X_P4_3
10X_P4_3_AACTCTTAGTTGCAGG,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,3,M,3-M-8,21.559201,6.817286,10X_P4_3
10X_P4_3_AACTCTTTCATAACCG,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3-M-8,-5.939946,19.630456,10X_P4_3


### subset droplet annotations to just this run

In [42]:
droplet_annotations_subset = droplet_annotations.query('channel == "10X_P4_3"')
print(droplet_annotations_subset.shape)
droplet_annotations_subset.head()

(149, 11)


Unnamed: 0_level_0,tissue,subtissue,cell_ontology_class,cell_ontology_id,free_annotation,cluster.ids,mouse.sex,mouse.id,tSNE_1,tSNE_2,channel
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10X_P4_3_AAAGTAGAGATGCCAG,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,0,M,3-M-8,-28.034252,-13.407488,10X_P4_3
10X_P4_3_AACCGCGTCCAACCAA,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3-M-8,-15.890827,7.453387,10X_P4_3
10X_P4_3_AACTCCCGTCGGGTCT,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,M,3-M-8,-11.131739,8.426802,10X_P4_3
10X_P4_3_AACTCTTAGTTGCAGG,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,3,M,3-M-8,21.559201,6.817286,10X_P4_3
10X_P4_3_AACTCTTTCATAACCG,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3-M-8,-5.939946,19.630456,10X_P4_3


### Read facs annotatinos

In [40]:
facs_annotations = pd.read_csv('/home/ubuntu/tabula-muris/00_data_ingest/03_tissue_annotation_csv/Bladder_facs_annotation.csv', index_col=0)
facs_annotations.columns = facs_annotations.columns.str.replace('.', '_')
facs_annotations.index = facs_annotations.index.str.replace('.', '-')
print(facs_annotations.shape)
facs_annotations.head()

(1378, 11)


Unnamed: 0_level_0,tissue,subtissue,cell_ontology_class,cell_ontology_id,free_annotation,cluster_ids,mouse_sex,mouse_id,tSNE_1,tSNE_2,plate_barcode
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A1-B000610-3_56_F-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,F,3_56_F,-7.182841,-10.895094,B000610
A1-B002764-3_38_F-1-1,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,3,F,3_38_F,4.199059,-11.05024,B002764
A1-B002771-3_39_F-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,F,3_39_F,-11.995435,-7.325534,B002771
A1-D041914-3_8_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,M,3_8_M,-6.820022,-14.174246,D041914
A1-D042253-3_9_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3_9_M,-24.163538,-7.499349,D042253


In [49]:
facs_annotations_subset = facs_annotations.query('(mouse_id == "3_8_M") & (plate_barcode == @bladder_plate_to_use)')
print(facs_annotations_subset.shape)
facs_annotations_subset.head()

(317, 11)


Unnamed: 0_level_0,tissue,subtissue,cell_ontology_class,cell_ontology_id,free_annotation,cluster_ids,mouse_sex,mouse_id,tSNE_1,tSNE_2,plate_barcode
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A1-D041914-3_8_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,M,3_8_M,-6.820022,-14.174246,D041914
A10-D041914-3_8_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3_8_M,-16.643703,-2.434115,D041914
A11-D041914-3_8_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,0,M,3_8_M,-8.102068,-17.87361,D041914
A12-D041914-3_8_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3_8_M,-21.30714,2.633208,D041914
A13-D041914-3_8_M-1-1,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,0,M,3_8_M,5.418347,-26.099985,D041914


In [101]:
facs_annotations_subset.query('cell_ontology_class == "bladder urothelial cell"').head()

Unnamed: 0_level_0,tissue,subtissue,cell_ontology_class,cell_ontology_id,free_annotation,cluster_ids,mouse_sex,mouse_id,tSNE_1,tSNE_2,plate_barcode
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A15-D041914-3_8_M-1-1,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,3,M,3_8_M,12.824701,22.522847,D041914
A16-D041914-3_8_M-1-1,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,4,M,3_8_M,21.264186,23.671775,D041914
A17-D041914-3_8_M-1-1,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,4,M,3_8_M,21.559859,21.387896,D041914
A18-D041914-3_8_M-1-1,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,4,M,3_8_M,24.46413,19.392865,D041914
A20-D041914-3_8_M-1-1,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,3,M,3_8_M,14.796384,24.363092,D041914


In [50]:
facs_counts = pd.read_csv(f'/mnt/data/facs_counts/{bladder_plate_to_use}.htseq-count-by-cell.csv', index_col=0)
columns_to_drop = [x for x in facs_counts.columns if '_' in x] + ['TAXON']
facs_counts = facs_counts.drop(columns=columns_to_drop)
facs_counts.head()

Unnamed: 0_level_0,0610005C13Rik,0610007C21Rik,0610007L01Rik,0610007N19Rik,0610007P08Rik,0610007P14Rik,0610007P22Rik,0610008F07Rik,0610009B14Rik,0610009B22Rik,...,Zxda,Zxdb,Zxdc,Zyg11a,Zyg11b,Zyx,Zzef1,Zzz3,a,l7Rn6
SAMPLE_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A12-D041914-3_8_M-1-1,0,194,28,57,0,2,3,0,0,0,...,0,0,0,0,0,299,0,2,0,41
B16-D041914-3_8_M-1-1,0,460,0,0,0,0,230,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C18-D041914-3_8_M-1-1,0,692,62,73,113,5,0,0,0,3,...,0,0,13,0,30,6,0,378,0,196
D22-D041914-3_8_M-1-1,0,90,16,7,0,0,1,0,0,22,...,0,1,0,0,0,0,0,0,0,0
F4-D041914-3_8_M-1-1,0,59,8,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,4,0,0


## Dot products of facs data


Pick two cells that are in the same cluster and annotated the same

In [102]:
# Both in cluster2, annotated as "bladder cell"
facs_cell_a = 'A10-D041914-3_8_M-1-1'
facs_cell_b = 'A12-D041914-3_8_M-1-1'

# Cluster 3, but also  "bladder urothelial cell"
facs_cell_c = 'A15-D041914-3_8_M-1-1'

In [104]:
facs_x = facs_counts.loc[facs_cell_a]
facs_y = facs_counts.loc[facs_cell_b]
facs_z = facs_counts.loc[facs_cell_c]

np.dot(facs_x, facs_y)

41737785706

In [105]:
facs_x.corr(facs_y)

0.9516412126034777

In [106]:
facs_x.corr(facs_z)

0.7178551838364595

In [107]:
facs_x.corr(facs_y, method='spearman')

0.5767937913621816

In [108]:
(facs_x > 0).corr(facs_y > 0)

0.5379428064333963

In [109]:
(facs_x > 0).corr(facs_z > 0)

0.44517106600194273

## Dot products of droplet data

In [60]:
droplet_annotations_subset.head()

Unnamed: 0_level_0,tissue,subtissue,cell_ontology_class,cell_ontology_id,free_annotation,cluster.ids,mouse.sex,mouse.id,tSNE_1,tSNE_2,channel
cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10X_P4_3_AAAGTAGAGATGCCAG,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,0,M,3-M-8,-28.034252,-13.407488,10X_P4_3
10X_P4_3_AACCGCGTCCAACCAA,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3-M-8,-15.890827,7.453387,10X_P4_3
10X_P4_3_AACTCCCGTCGGGTCT,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,5,M,3-M-8,-11.131739,8.426802,10X_P4_3
10X_P4_3_AACTCTTAGTTGCAGG,Bladder,,bladder urothelial cell,CL:1001428,Luminal bladder epithelial cell,3,M,3-M-8,21.559201,6.817286,10X_P4_3
10X_P4_3_AACTCTTTCATAACCG,Bladder,,bladder cell,CL:1001319,Bladder mesenchymal cell,2,M,3-M-8,-5.939946,19.630456,10X_P4_3


Pick some cells

In [92]:

# Both in cluster2, annotated as "bladder cell"
droplet_cell_a = 'AACCGCGTCCAACCAA-1'
droplet_cell_b = 'AACTCTTTCATAACCG-1'

# in cluster 3, "bladder urothelial cell"
droplet_cell_c = 'AACTCTTAGTTGCAGG-1'

In [110]:
index = adata.var_names

droplet_x = pd.Series(adata[droplet_cell_a, :].X.toarray(), index=index)
droplet_y = pd.Series(adata[droplet_cell_b, :].X.toarray(), index=index)
droplet_z = pd.Series(adata[droplet_cell_c, :].X.toarray(), index=index)

np.dot(droplet_x, droplet_y)

625094.0

In [111]:
droplet_x.corr(droplet_y)

0.8671407957916231

In [112]:
droplet_x.corr(droplet_z)

0.5792142448792879

In [113]:
droplet_x.corr(droplet_y, method='spearman')

0.5313184924613816

In [114]:
(droplet_x > 0).corr(droplet_y > 0)

0.48979496737274747

In [115]:
(droplet_x > 0).corr(droplet_z > 0)

0.4310073646429292

## Facs vs droplet

###  x vs x - same cell types

In [116]:
droplet_x.corr(facs_x)

0.19092797757213742

In [117]:
(droplet_x > 0).corr(facs_x > 0)

0.5023687519063417

### x vs z - different cell types

In [119]:
droplet_x.corr(facs_z)

0.18566616322026358

In [118]:
(droplet_x > 0).corr(facs_z > 0)

0.4268088649421203

### TODO: ie, making the nearest neighbor more likely to be of same type?