In [1]:
import pandas as pd
import numpy as np
import sklearnex
sklearnex.patch_sklearn()
import sklearn
from sklearn.metrics import adjusted_rand_score as ari
from sklearn.metrics import adjusted_mutual_info_score as ami

import scanpy as sc
import os

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


# K-means

In [3]:
from Spanve import *
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
recoder = pd.DataFrame(columns = ['data','ari','ami','pre_type','clust_method'])

data_dir = './recode/IMPUTEDLIBD/data/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]

for data_id in tqdm(data_ids):
    adata_raw = sc.read_h5ad(
        os.path.join(data_dir,data_id)
    )
    
    ground = adata_raw.obs['ground_truth']
    adata_pre = adata_preprocess(adata_raw)
    
    for pre_type in ['raw','cell_ranger.pca', 'p.spanve-k.imputated.pca', 'p.spanve-k.pca', #'spanve-k.imputated.pca', 'spanve-k.pca'
                     ]:

        if pre_type == 'raw':
            adata = adata_pre.copy()
            sc.pp.pca(adata)
            X = adata.obsm['X_pca']
        else:
            X = adata_raw.obsm[pre_type]
            
        cluster = AutoCluster()
        label = pd.Series(cluster.fit_predict(X),index=adata.obs_names)
        recoder.loc[len(recoder),:] = data_id,ari(ground,label),ami(ground,label),pre_type,'KMeans'

100%|██████████| 12/12 [01:59<00:00,  9.96s/it]


In [5]:
recoder.to_csv('./recode/IMPUTEDLIBD/KM.res.csv')

# Leiden

In [3]:
import leidenalg
import numpy as np
import pandas as pd
from scanpy import _utils
from natsort import natsorted

def search_resolution_leiden(adata, use_weights=True, resolution_search=np.linspace(0.5,1.5,10), iterations=-1):
    g = _utils.get_igraph_from_adjacency(adata.obsp['connectivities'], directed=True)
    weights = None
    if use_weights:
        weights = np.array(g.es["weight"]).astype(np.float64)
    best_quality = 0
    for r in resolution_search:
        part = leidenalg.find_partition(
            g, leidenalg.RBConfigurationVertexPartition, 
            resolution_parameter = r, weights = weights, 
            n_iterations = iterations,
        )
        q = part.quality()
        if q > best_quality:
            best_quality = q
            best_r = r
    return best_r

In [4]:
from Spanve import *
from tqdm import tqdm
import leidenalg

data_dir = './recode/IMPUTEDLIBD/data/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]
recoder = pd.DataFrame(columns = ['data','ari','ami','pre_type','clust_method'])
for data_id in tqdm(data_ids):
    adata = sc.read_h5ad(
        os.path.join(data_dir,data_id)
    )
    ground = adata.obs['ground_truth']    
    adata_pre = adata_preprocess(adata)
    
    for pre_type in ['raw','cell_ranger.pca', 'p.spanve-k.imputated.pca', 'p.spanve-k.pca', #'spanve-k.imputated.pca', 'spanve-k.pca'
                     ]:        
        adata = adata_pre.copy()
        
        if pre_type == 'raw':
            sc.pp.pca(adata)
            sc.pp.neighbors(adata)
        else:
            sc.pp.neighbors(adata,use_rep=pre_type)
            
        # sc.tl.leiden(adata, partition_type=leidenalg.ModularityVertexPartition, resolution = None)
        best_r = search_resolution_leiden(adata)
        sc.tl.leiden(adata, resolution = best_r)
        label = adata.obs['leiden']
        
        recoder.loc[len(recoder),:] = data_id,ari(ground,label),ami(ground,label),pre_type,'Leiden'

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit(
  @numba.jit(
  @numba.jit()
2024-02-20 12:43:26.679054: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  g = _utils.get_igraph_from_adjacency(adata.uns['neighbors']['connectivities'], directed=True)
  g = _utils.get_igraph_from_adjacency(adata.uns['neighbors']['connectivities'], directed=True)
  g = _utils.get_igraph_from_adjacency(adata.uns['neighbors']['connectivities'], directed=True)
  g = _utils.get_igraph_from_adjacency(adata.uns['neighbors']['connectivities'], directed=True)
  g = _utils.get_igraph_from_adjacency(adata.uns['neighbors']['connectivities'], directed=True)
  g = _utils.get_igrap

In [6]:
recoder.to_csv('./recode/IMPUTEDLIBD/LD.res.csv')

# BayesSpace
New Statics Method

In [9]:
data_dir = './recode/IMPUTEDLIBD/data/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]
recoder = pd.DataFrame(columns = ['data','ari','ami','pre_type','clust_method'])

for data_id in data_ids:
        # print(adata.var['highly_variable'].sum(), adata.var['spanve_spatial_features_pre'].sum())
        adata = sc.read_h5ad(os.path.join(data_dir,data_id))
        ground = adata.obs['ground_truth']
        ground = ground.dropna()
        
        labels = pd.read_csv(f"./recode/IMPUTEDLIBD/BS/{data_id.replace('.h5ad','')}_BS.out.csv",index_col=0)
        labels.index = adata.obs_names

        recoder.loc[len(recoder),:] = data_id,ari(ground,labels.loc[ground.index,'spanve_i']),ami(ground,labels.loc[ground.index,'spanve_i']),'spanve_impute','bayes_space'
        recoder.loc[len(recoder),:] = data_id,ari(ground,labels.loc[ground.index,'base']),ami(ground,labels.loc[ground.index,'base']),'raw','bayes_space'
        
        for method in ['topK', 'spanve-k.imputated.pca', 'p.spanve-k.pca', 'spanve-k.pca']:
            labels_topk = pd.read_csv(f'./recode/IMPUTEDLIBD/BS/{method}_{data_id.replace(".h5ad","")}.csv', index_col=0).iloc[:,0]
            recoder.loc[len(recoder),:] = data_id,ari(ground,labels_topk),ami(ground,labels_topk),method,'bayes_space'

In [11]:
recoder.to_csv('./recode/IMPUTEDLIBD/BS.res.csv')