In [None]:
import seaborn as sns
import anndata
import scanpy as sc
from ALLCools.clustering import cluster_enriched_features, significant_pc_test, log_scale


In [None]:
mch_adata_path = 'mCH.HVF.h5ad'
mcg_adata_path = 'mCG.HVF.h5ad'

# Cluster Enriched Features analysis
top_n_enriched_features=200
alpha=0.05
stat_plot=True

# you may provide a pre calculated cluster version. 
# If None, will perform basic clustering using parameters below.
cluster_col = None  

# These parameters only used when cluster_col is None
n_neighbors=25
leiden_resolution=1
min_cluster_size = 25
cluster_plot=True

In [None]:
mch_adata = anndata.read_h5ad(mch_adata_path)
mcg_adata = anndata.read_h5ad(mcg_adata_path)


In [None]:
if cluster_col is None:
    # IMPORTANT
    # put the unscaled matrix in mch_adata.raw
    mch_adata.raw = mch_adata
    mcg_adata.raw = mcg_adata
    
    log_scale(mch_adata)
    log_scale(mcg_adata)
    
    sc.tl.pca(mch_adata, n_comps=min(100, len(mch_adata)//min_cluster_size*min_cluster_size, len(mcg_adata)-1))
    sc.tl.pca(mcg_adata, n_comps=min(100, len(mcg_adata)//min_cluster_size*min_cluster_size, len(mcg_adata)-1))
    significant_pc_test(mch_adata, p_cutoff=0.1, update=True)
    significant_pc_test(mcg_adata, p_cutoff=0.1, update=True)
    
    sc.pp.neighbors(mch_adata, n_neighbors=n_neighbors)
    sc.pp.neighbors(mcg_adata, n_neighbors=n_neighbors)
    sc.tl.leiden(mch_adata, resolution=leiden_resolution)
    sc.tl.leiden(mcg_adata, resolution=leiden_resolution)
    
    if cluster_plot:
        sc.tl.umap(mch_adata)
        sc.pl.umap(mch_adata, color='leiden', title='mch')
        sc.tl.umap(mcg_adata)
        sc.pl.umap(mcg_adata, color='leiden', title='mcg')
    
    # return to unscaled X, CEF need to use the unscaled matrix
    mch_adata = mch_adata.raw.to_adata()
    mcg_adata = mcg_adata.raw.to_adata()
    
    cluster_col = 'leiden'

In [None]:
cluster_enriched_features(mch_adata,
                          cluster_col=cluster_col,
                          top_n=top_n_enriched_features,
                          alpha=alpha,
                          stat_plot=False)
cluster_enriched_features(mcg_adata,
                          cluster_col=cluster_col,
                          top_n=top_n_enriched_features,
                          alpha=alpha,
                          stat_plot=False)

In [None]:
# save adata
mch_adata.write_h5ad(mch_adata_path)
mcg_adata.write_h5ad(mcg_adata_path)
