# Import and settings

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import anndata
import scanpy as sc
import scanpy_scripts as ss
import bbknn
import phate

np.set_printoptions(linewidth=180)
sc.settings.verbosity = 1
expr_cmap = ss.lib.expression_colormap()
ss.lib.set_figsize((4, 4))

# Processing

In [2]:
organoid_ad = sc.read('organoid.cellxgene.h5ad')

In [6]:
organoid_ad

AnnData object with n_obs × n_vars = 130811 × 27320 
    obs: 'sample_id', 'version', 'week', 'day', 'strain', 'batch', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_bk_split5', 'nh3_annot1', 'nh3_broad_annot1'
    var: 'gene_ids', 'gene_symbols', 'highly_variable'
    obsm: 'X_umap_hm', 'X_umap_bk'

In [None]:
ss.lib.plot_embedding(organoid_ad, basis='umap_bk', groupby='nh3_broad_annot1')

In [171]:
ss.lib.cross_table(organoid_ad, 'nh3_annot1', 'strain')

strain,DSP,WA25
nh3_annot1,Unnamed: 1_level_1,Unnamed: 2_level_1
Endothelium,63,67
Merkel,56,83
Melanocytes,605,785
CNCC-like / Schwann,810,1612
CNCC-like,1415,685
Neuron progenitors,562,931
Immature neurons,761,754
Peridermal KC,465,689
Basal stem-like KC,1183,1526
Basal KC,5351,7076


In [173]:
organoid_ad.obs['week'].value_counts()

17-20_fetal_wks    72164
7-10_fetal_wks     21803
14-16_fetal_wks    18440
4-7_fetal_wks      18404
Name: week, dtype: int64

In [172]:
organoid_ad.obs['day'].value_counts()

day-133    72164
day-48     21803
day-85     18440
day-29     18404
Name: day, dtype: int64

In [128]:
organoid_ad.obs['nh3_broad_annot1'].value_counts()

Stroma           98067
Keratinocytes    23555
Neuronal          7530
Melanocytes       1390
Merkel             139
Endothelium        130
Name: nh3_broad_annot1, dtype: int64

In [3]:
ads = ss.lib.split_by_group(organoid_ad, groupby='nh3_broad_annot1')

In [8]:
ads

{'Endothelium': AnnData object with n_obs × n_vars = 130 × 27320 
     obs: 'sample_id', 'version', 'week', 'day', 'strain', 'batch', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_bk_split5', 'nh3_annot1', 'nh3_broad_annot1'
     var: 'gene_ids', 'gene_symbols', 'highly_variable'
     obsm: 'X_umap_hm', 'X_umap_bk',
 'Keratinocytes': AnnData object with n_obs × n_vars = 23555 × 27320 
     obs: 'sample_id', 'version', 'week', 'day', 'strain', 'batch', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_bk_split5', 'nh3_annot1', 'nh3_broad_annot1'
     var: 'gene_ids', 'gene_symbols', 'highly_variable'
     obsm: 'X_umap_hm', 'X_umap_bk',
 'Melanocytes': AnnData object with n_obs × n_vars = 1390 × 27320 
     obs: 'sample_id', 'version', 'week', 'day', 'strain', 'batch', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_bk_split5', 'nh3_annot1', 'nh3_broa

### Keratinocytes

In [9]:
fKC_ad = sc.read('../20200403_post_annot3_cleanup/fetal_skin.keratinocytes.doublet_removed_processed.20200403.h5ad')

Only considering the two last: ['.20200403', '.h5ad'].
Only considering the two last: ['.20200403', '.h5ad'].


In [55]:
fKC_ad.X = fKC_ad.raw.X

In [62]:
fKC_ad

AnnData object with n_obs × n_vars = 1469 × 17905 
    obs: 'bh_doublet_pval', 'cell_caller', 'cluster_scrublet_score', 'doublet_pval', 'mt_prop', 'n_counts', 'n_genes', 'sanger_id', 'scrublet_score', 'chemistry', 'donor', 'gender', 'pcw', 'sorting', 'sample', 'chemistry_sorting', 'annot', 'hierarchy1', 'rachel_annot1', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_hm_r0_1', 'leiden_hm_r0_2', 'leiden_hm_r0_3', 'leiden_hm_r0_4', 'leiden_hm_r0_5', 'leiden_bk_r0_1', 'leiden_bk_r0_2', 'leiden_bk_r0_3', 'leiden_bk_r0_4', 'leiden_bk_r0_5', 'rachel_annot2', 'rachel_annot3'
    var: 'gene_ids', 'cc', 'mito', 'ribo', 'hb', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'hvg_full'
    uns: 'leiden', 'neighbors', 'neighbors_bk', 'neighbors_hm', 'pca'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm', 'X_umap_bk'
    varm: 'PCs'

In [66]:
fKC_ad.obs = fKC_ad.obs[[
    'sanger_id', 'chemistry_sorting', 'donor', 'gender', 'pcw', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'rachel_annot3',
]].rename(columns={'pcw': 'week'})

In [70]:
ads['Keratinocytes'].obs = ads['Keratinocytes'].obs[[
    'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
]]

In [85]:
oKC_ad = ss.lib.subsample(ads['Keratinocytes'], groupby='nh3_annot1', fraction=0.1, min_n=200)

In [86]:
kc_pooled = anndata.AnnData.concatenate(fKC_ad, oKC_ad, batch_key='dataset', batch_categories=['fetal_skin', 'organoid'])

In [91]:
k_org = kc_pooled.obs.dataset=='organoid'

In [92]:
kc_pooled.obs['annot'] = 'fsk_' + kc_pooled.obs['rachel_annot3'].astype(str)
kc_pooled.obs.loc[k_org, 'annot'] = 'org_' + kc_pooled.obs.loc[k_org, 'nh3_annot1'].astype(str)
kc_pooled.obs['annot'] = kc_pooled.obs['annot'].astype('category')

In [93]:
kc_pooled.obs.loc[~k_org, 'batch'] = kc_pooled.obs.loc[~k_org, 'sanger_id'].values
kc_pooled.obs['batch'] = kc_pooled.obs['batch'].astype('category')

In [94]:
kc_pooled.obs.batch.value_counts()

v3_WA25           1000
v3_DSP             914
v2_WA25            413
FCAImmP7964510     298
v2_DSP             240
FCAImmP7803042     231
FCAImmP7803043     180
FCAImmP7803026     148
FCAImmP7803027     119
FCAImmP7316897      94
FCAImmP7964508      93
FCAImmP7964509      89
FCAImmP7555848      52
FCAImmP7352191      43
FCAImmP7316888      36
FCAImmP7352190      28
FCAImmP7803034      14
FCAImmP7803024      13
FCAImmP7964505      11
FCAImmP7862096      11
FCAImmP7862095       9
Name: batch, dtype: int64

In [74]:
kc_pooled.obs.head()

Unnamed: 0,batch,chemistry_sorting,dataset,day,donor,gender,n_counts,n_genes,nh3_annot1,percent_hb,percent_mito,percent_ribo,percent_top50,rachel_annot3,sample_id,sanger_id,strain,week,annot
AAAGTAGAGTATCGAA-1-FCAImmP7316888-fetal_skin,,SC3Pv2_CD45N,fetal_skin,,F33,female,15891.0,2870,,0.018879,2.045183,54.080929,44.509471,Periderm,,FCAImmP7316888,,9,fsk_Periderm
AACCATGAGATTACCC-1-FCAImmP7316888-fetal_skin,,SC3Pv2_CD45N,fetal_skin,,F33,female,9631.0,2614,,0.020766,2.959194,35.01194,34.575849,Periderm,,FCAImmP7316888,,9,fsk_Periderm
ACCTTTAGTTCCACAA-1-FCAImmP7316888-fetal_skin,,SC3Pv2_CD45N,fetal_skin,,F33,female,5422.0,1885,,0.018443,3.430469,30.616009,34.267798,Periderm,,FCAImmP7316888,,9,fsk_Periderm
AGAGTGGTCGAACTGT-1-FCAImmP7316888-fetal_skin,,SC3Pv2_CD45N,fetal_skin,,F33,female,3173.0,1198,,0.0,1.922471,38.985188,33.217775,Periderm,,FCAImmP7316888,,9,fsk_Periderm
AGCGTCGTCAGAGCTT-1-FCAImmP7316888-fetal_skin,,SC3Pv2_CD45N,fetal_skin,,F33,female,5515.0,1528,,0.0,1.831369,47.978241,41.233001,Periderm,,FCAImmP7316888,,9,fsk_Periderm


In [95]:
ss.lib.simple_default_pipeline(kc_pooled, post_norm_only=True, batch='batch')

AnnData object with n_obs × n_vars = 4036 × 15277 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot3', 'sample_id', 'sanger_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'pca', 'neighbors_hm', 'neighbors', 'leiden'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PCs'

In [103]:
ss.lib.simple_default_pipeline(kc_pooled, post_pca_only=True, batch=['dataset', 'batch'])

AnnData object with n_obs × n_vars = 4036 × 15277 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot3', 'sample_id', 'sanger_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'pca', 'neighbors_hm', 'leiden', 'annot_colors', 'neighbors'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PCs'

In [None]:
ss.lib.plot_embedding(kc_pooled, basis='umap_hm', groupby='annot', figsize=(6,6))

In [118]:
kc_pooled.obs['annot'].unique()

[fsk_Periderm, fsk_Early KC (stem cell?), fsk_Suprabasal, fsk_Hair follicle?, fsk_Basal KC, org_HF Basal KC, org_Suprabasal KC, org_Basal KC, org_Peridermal KC, org_Basal stem-like KC]
Categories (10, object): [fsk_Periderm, fsk_Early KC (stem cell?), fsk_Suprabasal, fsk_Hair follicle?, ..., org_Suprabasal KC, org_Basal KC, org_Peridermal KC, org_Basal stem-like KC]

In [None]:
ss.lib.highlight(kc_pooled, basis='umap_hm', groupby='annot', groups={
    'fetal skin': ['fsk_Periderm', 'fsk_Early KC (stem cell?)', 'fsk_Basal KC', 'fsk_Hair follicle?', 'fsk_Suprabasal'],
    'organoid': ['org_Peridermal KC', 'org_Basal stem-like KC', 'org_Basal KC', 'org_HF Basal KC', 'org_Suprabasal KC']
}, wspace=0.6, figsize=((4,4)))

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(kc_pooled, basis='umap_hm', color=['dataset', 'batch'], ncols=6, wspace=0.5)

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(kc_pooled, basis='umap_hm', color=['week'], ncols=6, wspace=0.5, palette='viridis')

In [None]:
ss.lib.set_figsize((3.5,3.5))
ss.lib.plot_scatter(kc_pooled, basis='umap_hm', color=['KRT4', 'SOX6', 'KRT14', 'KRT85', 'KRT1', 'CDK1', 'PRRX1'], ncols=7, color_map=expr_cmap)

In [169]:
kc_pooled = sc.read('pooled_keratinocytes.processed.h5ad')

In [176]:
ss.lib.clear_colors(kc_pooled)
kc_pooled.write('pooled_keratinocytes.processed.h5ad', compression='lzf')

### Melanocytes

In [18]:
fML_ad = sc.read('../20200403_post_annot3_cleanup/fetal_skin.melanocytes.doublet_removed_processed.20200403.h5ad')

Only considering the two last: ['.20200403', '.h5ad'].
Only considering the two last: ['.20200403', '.h5ad'].


In [19]:
fML_ad

AnnData object with n_obs × n_vars = 794 × 15082 
    obs: 'bh_doublet_pval', 'cell_caller', 'cluster_scrublet_score', 'doublet_pval', 'mt_prop', 'n_counts', 'n_genes', 'sanger_id', 'scrublet_score', 'chemistry', 'donor', 'gender', 'pcw', 'sorting', 'sample', 'chemistry_sorting', 'annot', 'hierarchy1', 'rachel_annot1', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_hm_r0_1', 'leiden_hm_r0_2', 'leiden_hm_r0_3', 'leiden_hm_r0_4', 'leiden_hm_r0_5', 'leiden_bk_r0_1', 'leiden_bk_r0_2', 'leiden_bk_r0_3', 'leiden_bk_r0_4', 'leiden_bk_r0_5', 'rachel_annot2', 'rachel_annot3'
    var: 'gene_ids', 'cc', 'mito', 'ribo', 'hb', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'hvg_full'
    uns: 'leiden', 'neighbors', 'neighbors_bk', 'neighbors_hm', 'pca'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm', 'X_umap_bk'
    varm: 'PCs'

In [20]:
fML_ad.X = fML_ad.raw.X

In [21]:
fML_ad.obs = fML_ad.obs[[
    'sanger_id', 'chemistry_sorting', 'donor', 'gender', 'pcw', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'rachel_annot3',
]].rename(columns={'pcw': 'week', 'sanger_id': 'batch'})

In [22]:
ads['Melanocytes'].obs = ads['Melanocytes'].obs[[
    'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
]]

In [23]:
oML_ad = ads['Melanocytes'].copy()

In [37]:
ml_pooled = anndata.AnnData.concatenate(fML_ad, oML_ad, batch_key='dataset', batch_categories=['fetal_skin', 'organoid'])

In [38]:
k_org = ml_pooled.obs.dataset=='organoid'

In [39]:
ml_pooled.obs['annot'] = 'fsk_' + ml_pooled.obs['rachel_annot3'].astype(str)
ml_pooled.obs.loc[k_org, 'annot'] = 'org_' + ml_pooled.obs.loc[k_org, 'nh3_annot1'].astype(str)
ml_pooled.obs['annot'] = ml_pooled.obs['annot'].astype('category')

In [40]:
ss.lib.simple_default_pipeline(ml_pooled, post_norm_only=True, batch=['dataset', 'batch'], hvg_kw={'by_batch': ('dataset', 2)})

AnnData object with n_obs × n_vars = 2184 × 13449 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot3', 'sample_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable'
    uns: 'pca', 'neighbors_hm', 'neighbors', 'leiden'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PCs'

In [None]:
ss.lib.plot_embedding(ml_pooled, basis='umap_hm', groupby='annot', figsize=(6,6))

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(ml_pooled, basis='umap_hm', color=['dataset', 'week', 'batch'], ncols=6, wspace=0.5)

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.highlight(ml_pooled, basis='umap_hm', groupby='annot', groups={
    'fetal skin': [
        'fsk_Melanoblast',
        'fsk_Melanocyte',
    ],
    'organoid': ['org_Melanocytes'],
}, wspace=0.5)

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(ml_pooled, basis='umap_hm', color=['week'], palette='viridis')

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(ml_pooled, basis='umap_hm', color=['leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7'])

In [None]:
ss.lib.set_figsize((3,3))
ss.lib.plot_scatter(ml_pooled, basis='umap_hm', color=['PMEL', 'CDK1', 'OSTM1', 'PRRX1', 'TMEM215', 'OLFML2A'], color_map=expr_cmap, ncols=6, size=25)

In [71]:
ml_deg = ss.lib.diffexp(ml_pooled, groupby='leiden_hm_r0_1', filter_params='sc_default')

In [72]:
ml_deg.groupby('cluster').head(4)

Unnamed: 0,cluster,ref,rank,genes,scores,logfoldchanges,pvals,pvals_adj
13454,1,rest,5,LUM,15.743942,4.272083,4.444483000000001e-43,8.539121e-40
13457,1,rest,8,PRRX1,13.705588,4.619967,1.301944e-34,1.750985e-31
13464,1,rest,15,CXCL14,11.68604,3.888844,5.699996e-27,4.034697e-24
13466,1,rest,17,MFAP4,11.629888,4.046122,7.321066e-27,4.923051e-24
26905,2,rest,7,BIRC5,16.60408,6.613349,1.9535669999999998e-38,2.18946e-35
26906,2,rest,8,TK1,16.358027,6.284443,4.5298249999999993e-38,4.686278e-35
26910,2,rest,12,MAD2L1,15.76741,4.576672,1.499786e-37,1.440759e-34
26912,2,rest,14,CENPF,15.379494,5.651021,3.508167e-35,2.775373e-32
40349,3,rest,2,TMEM215,5.143225,3.375116,2.229221e-06,0.0007687383
40355,3,rest,8,COL2A1,4.59574,3.154106,1.785317e-05,0.00428763


In [58]:
ml_deg_paired = ss.lib.diffexp_paired(ml_pooled, groupby='leiden_hm_r0_1', pair=('2', '0'), filter_params='sc_default')

In [47]:
ss.lib.clear_colors(ml_pooled)

In [48]:
ml_pooled.write('pooled_melanocytes.processed.h5ad', compression='lzf')

... storing 'gene_symbols-organoid' as categorical


### Endothelium

In [4]:
fET_ad = sc.read('../20200403_post_annot3_cleanup/fetal_skin.endothelium.doublet_removed_processed.20200403.h5ad')

Only considering the two last: ['.20200403', '.h5ad'].
Only considering the two last: ['.20200403', '.h5ad'].


In [162]:
fET_ad

AnnData object with n_obs × n_vars = 2173 × 18354 
    obs: 'bh_doublet_pval', 'cell_caller', 'cluster_scrublet_score', 'doublet_pval', 'mt_prop', 'n_counts', 'n_genes', 'sanger_id', 'scrublet_score', 'chemistry', 'donor', 'gender', 'pcw', 'sorting', 'sample', 'chemistry_sorting', 'annot', 'hierarchy1', 'rachel_annot1', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_hm_r0_1', 'leiden_hm_r0_2', 'leiden_hm_r0_3', 'leiden_hm_r0_4', 'leiden_hm_r0_5', 'leiden_bk_r0_1', 'leiden_bk_r0_2', 'leiden_bk_r0_3', 'leiden_bk_r0_4', 'leiden_bk_r0_5', 'rachel_annot2', 'rachel_annot3'
    var: 'gene_ids', 'cc', 'mito', 'ribo', 'hb', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'hvg_full'
    uns: 'leiden', 'neighbors', 'neighbors_bk', 'neighbors_hm', 'pca', 'rachel_annot3_colors'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm', 'X_umap_bk'
    varm: 'PCs'

In [5]:
fET_ad.X = fET_ad.raw.X

In [6]:
fET_ad.obs = fET_ad.obs[[
    'sanger_id', 'chemistry_sorting', 'donor', 'gender', 'pcw', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'rachel_annot3',
]].rename(columns={'pcw': 'week', 'sanger_id': 'batch'})

In [7]:
ads['Endothelium'].obs = ads['Endothelium'].obs[[
    'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
]]

In [8]:
oET_ad = ads['Endothelium'].copy()

In [9]:
et_pooled = anndata.AnnData.concatenate(fET_ad, oET_ad, batch_key='dataset', batch_categories=['fetal_skin', 'organoid'])

In [10]:
k_org = et_pooled.obs.dataset=='organoid'

In [11]:
et_pooled.obs['annot'] = 'fsk_' + et_pooled.obs['rachel_annot3'].astype(str)
et_pooled.obs.loc[k_org, 'annot'] = 'org_' + et_pooled.obs.loc[k_org, 'nh3_annot1'].astype(str)
et_pooled.obs['annot'] = et_pooled.obs['annot'].astype('category')

In [12]:
ss.lib.simple_default_pipeline(et_pooled, post_norm_only=True, batch=['dataset', 'batch'])

AnnData object with n_obs × n_vars = 2303 × 15445 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot3', 'sample_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'pca', 'neighbors_hm', 'neighbors', 'leiden'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PCs'

In [None]:
ss.lib.plot_embedding(et_pooled, basis='umap_hm', groupby='annot', figsize=(6,6))

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.highlight(et_pooled, basis='umap_hm', groupby='annot', groups={
    'fetal skin': [
        'fsk_Early endothelial cell',
        'fsk_Arterial',
        'fsk_Tip cell (arterial?)',
        'fsk_Capillary/postcapillary venule',
        'fsk_Postcapillary venule',
        'fsk_Capillary (venular tip?)',
        'fsk_Early LE',
        'fsk_LE',
    ],
    'organoid': ['org_Endothelium'],
}, wspace=0.7)

In [None]:
ss.lib.set_figsize((5,5))
ss.lib.plot_scatter(et_pooled, basis='umap_hm', color=['week'], ncols=6, wspace=0.5, size=20, palette='viridis')

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(et_pooled, basis='umap_hm', color=['dataset', 'batch'], ncols=6, wspace=0.5, palette='tab20')

In [None]:
ss.lib.set_figsize((3,3))
ss.lib.plot_scatter(et_pooled, basis='umap_hm', color_map=expr_cmap, ncols=5, size=25, color=['PLVAP', 'CCL21', 'CDK1'])

In [17]:
et_pooled.write('pooled_endothelium.processed.h5ad', compression='lzf')

... storing 'gene_symbols-organoid' as categorical


### Neuronal

In [81]:
fST_ad = sc.read('../data/h5ad/20200114/fetal_skin.stroma.doublet_removed_processed.20200114.h5ad')

Only considering the two last: ['.20200114', '.h5ad'].
Only considering the two last: ['.20200114', '.h5ad'].


In [82]:
fST_ad

AnnData object with n_obs × n_vars = 134005 × 27117 
    obs: 'bh_doublet_pval', 'cell_caller', 'cluster_scrublet_score', 'doublet_pval', 'mt_prop', 'n_counts', 'n_genes', 'sanger_id', 'scrublet_score', 'chemistry', 'donor', 'gender', 'pcw', 'sorting', 'sample', 'chemistry_sorting', 'annot', 'hierarchy1', 'rachel_annot1', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_hm_r0_1', 'leiden_hm_r0_2', 'leiden_hm_r0_3', 'leiden_hm_r0_4', 'leiden_hm_r0_5', 'leiden_bk_r0_1', 'leiden_bk_r0_2', 'leiden_bk_r0_3', 'leiden_bk_r0_4', 'leiden_bk_r0_5', 'rachel_annot2'
    var: 'gene_ids', 'cc', 'mito', 'ribo', 'hb', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'hvg_full'
    uns: 'leiden', 'neighbors', 'neighbors_bk', 'neighbors_hm', 'pca', 'rachel_annot2_colors'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm', 'X_umap_bk'
    varm: 'PCs'

In [83]:
fST_ad.obs['rachel_annot2'].value_counts()

Fibroblast                102089
Smooth muscle/pericyte     10265
Muscle/myofibroblast        6622
MSC                         6288
Schwann cells               5305
Adipocyte                   2200
Unknown                     1236
Name: rachel_annot2, dtype: int64

In [85]:
fST_ad.obs['annot'].value_counts()

nan                         81299
fs_Fibroblast               41052
fs_Fibroblast_prolif         3556
fs_unknown                   2422
fs_Smooth muscle             2077
fs_Schwann cell               916
fs_Skeletal muscle            743
fs_Adipocyte?                 485
fs_Fibroblast_HF?             427
fs_HF bud                     423
fs_Neuronal                   377
fs_Fibroblast?                188
fs_Fibro Mac                   35
fs_Lymphatic endothelium        2
fs_Macrophage                   1
fs_Keratinocyte                 1
fs_Pro Pre B cell               1
Name: annot, dtype: int64

In [87]:
fNR_ad = fST_ad[(fST_ad.obs.rachel_annot2=='Schwann cells') | (fST_ad.obs.annot=='fs_Neuronal'), :].copy()

In [89]:
del fST_ad

In [88]:
fNR_ad

AnnData object with n_obs × n_vars = 5681 × 27117 
    obs: 'bh_doublet_pval', 'cell_caller', 'cluster_scrublet_score', 'doublet_pval', 'mt_prop', 'n_counts', 'n_genes', 'sanger_id', 'scrublet_score', 'chemistry', 'donor', 'gender', 'pcw', 'sorting', 'sample', 'chemistry_sorting', 'annot', 'hierarchy1', 'rachel_annot1', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'leiden_hm_r0_1', 'leiden_hm_r0_2', 'leiden_hm_r0_3', 'leiden_hm_r0_4', 'leiden_hm_r0_5', 'leiden_bk_r0_1', 'leiden_bk_r0_2', 'leiden_bk_r0_3', 'leiden_bk_r0_4', 'leiden_bk_r0_5', 'rachel_annot2'
    var: 'gene_ids', 'cc', 'mito', 'ribo', 'hb', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'hvg_full'
    uns: 'leiden', 'neighbors', 'neighbors_bk', 'neighbors_hm', 'pca', 'rachel_annot2_colors'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm', 'X_umap_bk'
    varm: 'PCs'

In [91]:
fNR_ad.obs['rachel_annot2'].value_counts()

Schwann cells    5305
Fibroblast        374
Unknown             2
Name: rachel_annot2, dtype: int64

In [100]:
fNR_ad.obs['rachel_annot2'] = ss.lib.regroup(fNR_ad, groupby='rachel_annot2', regroups={'Schwann cells': 'Schwann cells', 'Neuronal': ['Fibroblast', 'Unknown']})

In [90]:
fNR_ad.X = fNR_ad.raw.X

In [101]:
fNR_ad.obs = fNR_ad.obs[[
    'sanger_id', 'chemistry_sorting', 'donor', 'gender', 'pcw', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'rachel_annot2',
]].rename(columns={'pcw': 'week', 'sanger_id': 'batch'})

In [104]:
ads['Neuronal'].obs = ads['Neuronal'].obs[[
    'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
]]

ads['Merkel'].obs = ads['Merkel'].obs[[
    'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
]]

In [103]:
ads['Neuronal']

AnnData object with n_obs × n_vars = 7530 × 27320 
    obs: 'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
    var: 'gene_ids', 'gene_symbols', 'highly_variable'
    obsm: 'X_umap_hm', 'X_umap_bk'

In [105]:
oNR_ad = anndata.AnnData.concatenate(ads['Neuronal'], ads['Merkel'], batch_key='tmp')

In [107]:
del oNR_ad.obs['tmp']

In [108]:
nr_pooled = anndata.AnnData.concatenate(fNR_ad, oNR_ad, batch_key='dataset', batch_categories=['fetal_skin', 'organoid'])

In [109]:
k_org = nr_pooled.obs.dataset=='organoid'

In [110]:
nr_pooled.obs['annot'] = 'fsk_' + nr_pooled.obs['rachel_annot2'].astype(str)
nr_pooled.obs.loc[k_org, 'annot'] = 'org_' + nr_pooled.obs.loc[k_org, 'nh3_annot1'].astype(str)
nr_pooled.obs['annot'] = nr_pooled.obs['annot'].astype('category')

In [111]:
ss.lib.simple_default_pipeline(nr_pooled, post_norm_only=True, batch=['dataset', 'batch'], hvg_kw={'by_batch': ('dataset', 1)})

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../miniconda/envs/scrna/lib/python3.6/site-packages/umap/rp_tree.py", line 135:
@numba.njit(fastmath=True, nogil=True, parallel=True)
def euclidean_random_projection_split(data, indices, rng_state):
^

  self.func_ir.loc))
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../miniconda/envs/scrna/lib/python3.6/site-packages/umap/utils.py", line 409:
@numba.njit(parallel=True)
def build_candidates(current_graph, n_vertices, n_neighbors, max_candidates, rng_state):
^

  current_graph, n_vertices, n_neighbors, max_candid

AnnData object with n_obs × n_vars = 13350 × 18812 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot2', 'sample_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-0-organoid', 'gene_symbols-0-organoid', 'highly_variable-0-organoid', 'gene_ids-1-organoid', 'gene_symbols-1-organoid', 'highly_variable-1-organoid', 'highly_variable'
    uns: 'pca', 'neighbors_hm', 'neighbors', 'leiden'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PC

In [222]:
ss.lib.simple_default_pipeline(nr_pooled, post_pca_only=True, batch=['dataset', 'batch'])

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../miniconda/envs/scrna/lib/python3.6/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  self.func_ir.loc))


AnnData object with n_obs × n_vars = 12835 × 18812 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot2', 'sample_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9', 'leiden_r0_1', 'leiden_r0_3', 'leiden_r0_5', 'leiden_r0_7', 'leiden_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'pca', 'neighbors_hm', 'leiden', 'dataset_colors', 'week_colors', 'b

In [None]:
ss.lib.plot_embedding(nr_pooled, basis='umap_hm', groupby='annot', figsize=(6,6))

In [291]:
nr_pooled.obs.annot.value_counts()

fsk_Schwann cells          5305
org_CNCC-like / Schwann    2422
org_CNCC-like              2100
org_Immature neurons       1515
org_Neuron progenitors     1493
Name: annot, dtype: int64

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.highlight(nr_pooled, basis='umap_hm', groupby='annot', groups={
    'fetal skin': ['fsk_Schwann cells',],
    'organoid': ['org_CNCC-like / Schwann', 'org_CNCC-like', 'org_Immature neurons', 'org_Neuron progenitors', 'org_Merkel'],
}, wspace=0.5)

In [None]:
ss.lib.set_figsize((5,5))
ss.lib.plot_scatter(nr_pooled, basis='umap_hm', color=['week'], ncols=6, wspace=0.5, palette='viridis')

In [None]:
ss.lib.set_figsize((3,3))
ss.lib.plot_scatter(nr_pooled, basis='umap_hm', color=['NRXN1', 'MBP', 'POSTN', 'CDK1', 'PIFO', 'STMN2', 'PRRX1', 'CHGA', 'KRT8', 'MYF5'], ncols=5, wspace=0.3, color_map=expr_cmap)

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(nr_pooled, basis='umap_hm', color=['dataset', 'batch'], ncols=6, wspace=0.5, palette='tab20')

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(nr_pooled, basis='umap_hm', color=['leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7'], ncols=6, wspace=0.3)

In [119]:
nr_deg = ss.lib.diffexp(nr_pooled, groupby='leiden_hm_r0_1', filter_params='sc_default')

  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


In [131]:
ss.lib.extract_de_table(nr_pooled.uns['rank_genes_groups']).groupby('cluster').head(4)

Unnamed: 0,cluster,ref,rank,genes,scores,logfoldchanges,pvals,pvals_adj
0,0,rest,0,S100A10,64.679207,2.706211,0.0,0.0
1,0,rest,1,S100A4,64.652817,3.752243,0.0,0.0
2,0,rest,2,S100A6,59.995148,2.772817,0.0,0.0
3,0,rest,3,NRXN1,54.102886,2.584646,0.0,0.0
18812,1,rest,0,POSTN,37.648357,2.957242,3.147506e-264,5.921087e-260
18813,1,rest,1,BCHE,36.811153,2.207459,6.074679999999999e-258,5.713844e-254
18814,1,rest,2,CDH6,36.456551,2.617402,2.660345e-249,1.6682139999999998e-245
18815,1,rest,3,PLS3,33.933086,2.191653,1.824546e-222,8.580841000000001e-219
37624,2,rest,0,CLU,39.514156,4.202529,1.7131740000000001e-261,2.929839e-258
37625,2,rest,1,MSX1,32.69017,5.206134,9.400142000000001e-188,4.912096e-185


In [123]:
nr_deg.groupby('cluster').head(4)

Unnamed: 0,cluster,ref,rank,genes,scores,logfoldchanges,pvals,pvals_adj
37625,2,rest,1,MSX1,32.69017,5.206134,9.400142000000001e-188,4.912096e-185
37626,2,rest,2,SPARCL1,31.325298,4.746702,7.883896e-177,3.617362e-174
37627,2,rest,3,PIFO,30.762283,6.17272,6.156738000000001e-169,2.3164109999999998e-166
37628,2,rest,4,KRT18,28.965515,4.33095,1.251995e-157,3.991954e-155
56441,3,rest,5,MAD2L1,50.786125,4.730845,0.0,0.0
56444,3,rest,8,TOP2A,47.296158,5.692601,3.755575e-309,7.064987e-306
56445,3,rest,9,CENPF,46.479389,5.158944,1.586005e-307,2.7123569999999997e-304
56447,3,rest,11,UBE2C,45.247597,5.607773,9.682585e-291,1.517907e-287
75248,4,rest,0,STMN2,117.706276,7.507607,0.0,0.0
75258,4,rest,10,DCX,59.350899,5.884221,0.0,0.0


In [174]:
ss.lib.clear_colors(nr_pooled)

In [175]:
nr_pooled.write('pooled_neuronal.processed.h5ad', compression='lzf')

... storing 'gene_symbols-0-organoid' as categorical
... storing 'gene_symbols-1-organoid' as categorical
  .format(key, e)


### Stroma

In [228]:
fST1_ad = ss.lib.subsample(fST_ad, groupby='rachel_annot2', fraction=0.02, min_n=200)

In [248]:
fST2_ad = fST1_ad[~fST1_ad.obs.rachel_annot2.isin(['Unknown', 'Schwann cells']), :].copy()

In [249]:
fST2_ad.X = fST2_ad.raw.X

In [250]:
fST2_ad.obs = fST2_ad.obs[[
    'sanger_id', 'chemistry_sorting', 'donor', 'gender', 'pcw', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'rachel_annot2',
]].rename(columns={'pcw': 'week', 'sanger_id': 'batch'})

In [251]:
fST2_ad

AnnData object with n_obs × n_vars = 2848 × 27117 
    obs: 'batch', 'chemistry_sorting', 'donor', 'gender', 'week', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'rachel_annot2'
    var: 'gene_ids', 'cc', 'mito', 'ribo', 'hb', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'hvg_full'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm', 'X_umap_bk'
    varm: 'PCs'

In [232]:
ads['Stroma'].obs = ads['Stroma'].obs[[
    'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
]]

In [265]:
ads['Stroma']

AnnData object with n_obs × n_vars = 98067 × 27320 
    obs: 'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
    var: 'gene_ids', 'gene_symbols', 'highly_variable'
    obsm: 'X_umap_hm', 'X_umap_bk'

In [235]:
oST_ad = ss.lib.subsample(ads['Stroma'], groupby='nh3_annot1', fraction=0.03, min_n=200)

In [268]:
fST2_ad.obs.chemistry_sorting.value_counts()

SC5P-R2_CD45N     1432
SC3Pv2_CD45N      1019
SC5P-R2_CD45en     190
SC3Pv2_CD45P       144
SC5P-R2_CD45P       42
SC3Pv2_Total        21
Name: chemistry_sorting, dtype: int64

In [237]:
oST_ad

View of AnnData object with n_obs × n_vars = 3365 × 27320 
    obs: 'sample_id', 'batch', 'day', 'week', 'strain', 'n_counts', 'n_genes', 'percent_mito', 'percent_ribo', 'percent_hb', 'percent_top50', 'nh3_annot1'
    var: 'gene_ids', 'gene_symbols', 'highly_variable'
    obsm: 'X_umap_hm', 'X_umap_bk'

In [252]:
st_pooled = anndata.AnnData.concatenate(fST2_ad, oST_ad, batch_key='dataset', batch_categories=['fetal_skin', 'organoid'])

In [253]:
k_org = st_pooled.obs.dataset=='organoid'

In [254]:
st_pooled.obs['annot'] = 'fsk_' + st_pooled.obs['rachel_annot2'].astype(str)
st_pooled.obs.loc[k_org, 'annot'] = 'org_' + st_pooled.obs.loc[k_org, 'nh3_annot1'].astype(str)
st_pooled.obs['annot'] = st_pooled.obs['annot'].astype('category')

In [255]:
ss.lib.simple_default_pipeline(st_pooled, post_norm_only=True, batch=['dataset', 'batch'])

AnnData object with n_obs × n_vars = 6213 × 18812 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot2', 'sample_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'pca', 'neighbors_hm', 'neighbors', 'leiden'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PCs'

In [269]:
ss.lib.simple_default_pipeline(st_pooled, post_pca_only=True, batch=['dataset', 'batch'])

AnnData object with n_obs × n_vars = 6213 × 18812 
    obs: 'batch', 'chemistry_sorting', 'dataset', 'day', 'donor', 'gender', 'n_counts', 'n_genes', 'nh3_annot1', 'percent_hb', 'percent_mito', 'percent_ribo', 'percent_top50', 'rachel_annot2', 'sample_id', 'strain', 'week', 'annot', 'leiden_hm_r0_1', 'leiden_hm_r0_3', 'leiden_hm_r0_5', 'leiden_hm_r0_7', 'leiden_hm_r0_9'
    var: 'gene_ids-fetal_skin', 'cc-fetal_skin', 'mito-fetal_skin', 'ribo-fetal_skin', 'hb-fetal_skin', 'n_cells-fetal_skin', 'highly_variable-fetal_skin', 'means-fetal_skin', 'dispersions-fetal_skin', 'dispersions_norm-fetal_skin', 'highly_variable_nbatches-fetal_skin', 'highly_variable_intersection-fetal_skin', 'hvg_full-fetal_skin', 'gene_ids-organoid', 'gene_symbols-organoid', 'highly_variable-organoid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'pca', 'neighbors_hm', 'leiden', 'annot_colors', 'neighbors'
    obsm: 'X_pca', 'X_pca_hm', 'X_umap_hm'
    varm: 'PCs'

In [None]:
ss.lib.plot_embedding(st_pooled, basis='umap_hm', groupby='annot', figsize=(6,6), palette='tab20', legend_fontsize=12)

In [None]:
ss.lib.set_figsize((4,4))
sc.settings.set_figure_params(dpi=67)
ss.lib.highlight(st_pooled, basis='umap_hm', groupby='annot', groups={
    'fetal skin': ['fsk_MSC', 'fsk_Smooth muscle/pericyte', 'fsk_Muscle/myofibroblast', 'fsk_Fibroblast', 'fsk_Adipocyte'],
    'organoid': ['org_MSC / HF bud', 'org_Pericyte-like', 'org_Myocyte-like', 'org_Mesenchymal', 'org_Proliferating mesenchymal'],
}, wspace=0.7)

In [None]:
ss.lib.set_figsize((5,5))
ss.lib.plot_scatter(st_pooled, basis='umap_hm', color=['week'], ncols=6, wspace=0.5, palette='viridis')

In [None]:
ss.lib.set_figsize((4,4))
ss.lib.plot_scatter(st_pooled, basis='umap_hm', color=['dataset', 'week', 'batch'], ncols=6, wspace=0.5, palette='tab20', size=10)

In [None]:
ss.lib.plot_embedding(st_pooled, basis='umap_hm', groupby='leiden_hm_r0_3', wspace=0.5, palette='tab20', size=10, figsize=(6,6), legend_fontsize=14)

In [150]:
st_deg = ss.lib.diffexp(st_pooled, groupby='leiden_hm_r0_3', filter_params='sc_default')

  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


In [None]:
ss.lib.set_figsize((3,3))
ss.lib.plot_scatter(
    st_pooled, basis='umap_hm', ncols=5, wspace=0.3, color_map=expr_cmap,
    color=['COL3A1', 'CDK1', 'RGS5', 'ACTA2', 'MYOD1', 'MYF5', 'LPL', 'RSPO3', 'SLC6A1', 'COL2A1']
)

In [155]:
ss.lib.extract_de_table(st_pooled.uns['rank_genes_groups']).groupby('cluster').head(4)

Unnamed: 0,cluster,ref,rank,genes,scores,logfoldchanges,pvals,pvals_adj
0,0,rest,0,VIM,24.648363,1.184374,2.25167e-116,4.235842e-112
1,0,rest,1,DPT,24.471739,2.584664,9.340036999999999e-115,8.785239e-111
2,0,rest,2,FTL,21.696331,1.078214,4.999481e-93,2.351256e-89
3,0,rest,3,COL6A2,21.343414,1.445403,4.035992e-90,1.518502e-86
18812,1,rest,0,MGP,34.275532,3.519251,9.4029e-199,1.768874e-194
18813,1,rest,1,OGN,33.042774,2.869116,4.642446e-187,4.366685e-183
18814,1,rest,2,ASPN,32.290695,3.85922,4.527943e-172,2.839322e-168
18815,1,rest,3,EGFL6,27.131275,2.94181,8.618068e-135,4.053078e-131
37624,2,rest,0,APCDD1,33.974319,3.200689,1.819807e-189,3.423422e-185
37625,2,rest,1,TWIST2,28.637514,2.619818,6.547346e-145,6.158434e-141


In [151]:
ss.lib.extract_de_table(st_pooled.uns['rank_genes_groups_filtered']).groupby('cluster').head(4)

Unnamed: 0,cluster,ref,rank,genes,scores,logfoldchanges,pvals,pvals_adj
37639,2,rest,15,ADGRV1,19.714033,3.578923,1.7821600000000002e-73,1.862555e-70
37641,2,rest,17,F13A1,19.31019,4.476186,9.859539e-70,8.064245e-67
37647,2,rest,23,NELL2,18.355057,3.358448,1.860676e-65,1.3462710000000001e-62
37650,2,rest,26,SMPDL3A,18.053085,3.014866,3.988811e-64,2.7791670000000002e-61
56441,3,rest,5,NUSAP1,31.415335,6.472603,4.905724e-121,1.025405e-117
56442,3,rest,6,CDK1,31.327385,6.240442,6.49136e-122,1.5264430000000002e-118
56444,3,rest,8,MAD2L1,30.285755,5.061568,3.574461e-122,9.606109000000001e-119
56445,3,rest,9,UBE2C,29.523567,6.252518,6.459272e-113,1.104653e-109
75250,4,rest,2,TPD52,25.819702,5.932156,1.306109e-93,4.914104e-90
75253,4,rest,5,CRYM,24.161327,5.490643,5.802349e-87,1.8192300000000002e-83


In [None]:
ss.lib.set_figsize((3,3))
ss.lib.plot_scatter(st_pooled, basis='umap_hm', color=['CDK1', 'RGS5', 'ACTA2', 'MYOD1', 'MYF5', 'LPL', 'SOX2', 'CRYM', 'RSPO3', 'POSTN'], ncols=5, wspace=0.5, color_map=expr_cmap)

In [296]:
ss.lib.clear_colors(st_pooled)

In [298]:
st_pooled.write('pooled_stroma.processed.h5ad', compression='lzf')

... storing 'gene_symbols-organoid' as categorical


In [147]:
st_pooled = sc.read('pooled_stroma.processed.h5ad')

In [276]:
msc_markers = 'NT5E THY1 MCAM NGFR PDGFRA PDGFRB ITGA1'.split()