In [1]:
import FlowGrid as fg
import scanpy as sc
import sys

In [2]:
###READ DATA
adata = sc.read('GSE102827_merged_all_raw.h5ad')

In [3]:
adata

AnnData object with n_obs × n_vars = 65539 × 25187
    obs: 'stim', 'sample', 'maintype', 'celltype', 'subtype'

In [4]:
#Normalization
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata.raw = adata

#Highly variable genes selection
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata = adata[:, adata.var['highly_variable']]

In [5]:
#PCA to 5 dimensions#
sc.tl.pca(adata, n_comps=5)

In [6]:
adata

AnnData object with n_obs × n_vars = 65539 × 3242
    obs: 'stim', 'sample', 'maintype', 'celltype', 'subtype'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'log1p', 'hvg', 'pca'
    obsm: 'X_pca'
    varm: 'PCs'

In [7]:
recomm_parameters,CHI_report= fg.autoFlowGrid(adata,3)

autoFlowGrid Starts!
autoFlowGrid completed in : 27.686 seconds.
29 sets of parameters are stored.
3 sets of parameters are recommended.



In [8]:
CHI_report

Unnamed: 0,iCHI,CHI,nobs
binN_10_eps_1.0_FlowGrid,882791,19618,45
binN_5_eps_1.0_FlowGrid,855735,45039,19
binN_7_eps_1.0_FlowGrid,678935,18350,37
binN_11_eps_1.0_FlowGrid,678430,14748,46
binN_15_eps_1.6_FlowGrid,642825,20736,31
binN_9_eps_1.0_FlowGrid,622054,16812,37
binN_11_eps_1.6_FlowGrid,615403,29305,21
binN_12_eps_1.0_FlowGrid,528115,10156,52
binN_6_eps_1.0_FlowGrid,516769,17226,30
binN_8_eps_1.0_FlowGrid,513460,15559,33


In [None]:

#Scanpy louvain clustering for comparison

sc.pp.neighbors(adata, n_neighbors=10, n_pcs=5)
sc.tl.leiden(adata)

#umap
sc.tl.umap(adata)

#Plot figures
sc.settings.set_figure_params(dpi=100,facecolor = "#a3b2ab")
sc.pl.umap(adata, color=recomm_parameters, frameon =False, palette = 'gist_ncar',legend_fontsize = 6, size = 3)

In [None]:
sc.settings.set_figure_params(dpi=100,facecolor = "#a3b2ab")
sc.pl.umap(adata, color=['maintype','leiden'], frameon =False, palette = 'gist_ncar',legend_fontsize = 6, size = 3)

In [12]:
fg.AdjustedRandScore(adata,recomm_parameters,['maintype'])

binN_10_eps_1.0_FlowGrid vs.maintype ARI:0.5748
binN_5_eps_1.0_FlowGrid vs.maintype ARI:0.5006
binN_7_eps_1.0_FlowGrid vs.maintype ARI:0.4564


In [13]:
fg.AdjustedRandScore(adata,['leiden'],['maintype'])

leiden vs.maintype ARI:0.2555


In [21]:
adata

AnnData object with n_obs × n_vars = 65539 × 3242
    obs: 'stim', 'sample', 'maintype', 'celltype', 'subtype', 'binN_14_eps_0.6_FlowGrid', 'binN_14_eps_0.8_FlowGrid', 'binN_5_eps_1.0_FlowGrid', 'binN_6_eps_1.0_FlowGrid', 'binN_7_eps_1.0_FlowGrid', 'binN_8_eps_1.0_FlowGrid', 'binN_9_eps_1.0_FlowGrid', 'binN_10_eps_1.0_FlowGrid', 'binN_11_eps_1.0_FlowGrid', 'binN_12_eps_1.0_FlowGrid', 'binN_5_eps_1.2_FlowGrid', 'binN_6_eps_1.2_FlowGrid', 'binN_7_eps_1.2_FlowGrid', 'binN_8_eps_1.2_FlowGrid', 'binN_9_eps_1.2_FlowGrid', 'binN_10_eps_1.2_FlowGrid', 'binN_11_eps_1.2_FlowGrid', 'binN_12_eps_1.2_FlowGrid', 'binN_5_eps_1.4_FlowGrid', 'binN_6_eps_1.4_FlowGrid', 'binN_7_eps_1.4_FlowGrid', 'binN_8_eps_1.4_FlowGrid', 'binN_9_eps_1.4_FlowGrid', 'binN_10_eps_1.4_FlowGrid', 'binN_11_eps_1.4_FlowGrid', 'binN_12_eps_1.4_FlowGrid', 'binN_7_eps_1.6_FlowGrid', 'binN_8_eps_1.6_FlowGrid', 'binN_11_eps_1.6_FlowGrid', 'binN_13_eps_1.6_FlowGrid', 'binN_15_eps_1.6_FlowGrid', 'leiden'
    var: 'highly_variable', 

In [22]:
fg.keep_labels(adata,recomm_parameters)

In [23]:
adata

AnnData object with n_obs × n_vars = 65539 × 3242
    obs: 'stim', 'sample', 'maintype', 'celltype', 'subtype', 'binN_5_eps_1.0_FlowGrid', 'binN_7_eps_1.0_FlowGrid', 'binN_10_eps_1.0_FlowGrid', 'leiden'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'leiden', 'umap', 'binN_10_eps_1.0_FlowGrid_colors', 'binN_5_eps_1.0_FlowGrid_colors', 'binN_7_eps_1.0_FlowGrid_colors', 'leiden_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'