In [None]:
import pandas as pd
import numpy as np
import sklearn

from sklearn.metrics import adjusted_rand_score as ari
from sklearn.metrics import adjusted_mutual_info_score as ami

import scanpy as sc
import os

recoder = pd.DataFrame(columns = ['data','ari','ami','pre_type','clust_method'])

# K-means

In [None]:
from Softs.Spanve import *
from tqdm import tqdm

data_dir = './recode/IMPUTEDLIBD/processed/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]

for data_id in tqdm(data_ids):
    adata_raw = sc.read_h5ad(
        os.path.join(data_dir,data_id)
    )
    ground = adata_raw.obs['ground_truth']
    
    adata_pre = adata_preprocess(adata_raw)
    
    for pre_type in ['raw','s.impute','s.impute_p']:
        adata = adata_pre.copy()
        if pre_type == 'raw':
            sc.pp.pca(adata)
            X = adata.obsm['X_pca']
        elif pre_type == 's.impute':
            X = adata.obsm['spanve.imputed.pca']
        elif pre_type == 's.impute_p':
            X = adata.obsm['spanve.imputed.pca.pre']
            
        cluster = AutoCluster()
        label = pd.Series(cluster.fit_predict(X),index=adata.obs_names)
        
        recoder.loc[len(recoder),:] = data_id,ari(ground,label),ami(ground,label),pre_type,'KMeans'

In [None]:
recoder.to_csv('./recode/IMPUTEDLIBD/KM.res.csv')

# Leiden

In [None]:
from Softs.Spanve import *
from tqdm import tqdm

data_dir = './recode/IMPUTEDLIBD/processed/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]

for data_id in tqdm(data_ids):
    adata = sc.read_h5ad(
        os.path.join(data_dir,data_id)
    )
    ground = adata.obs['ground_truth']
    
    model = Spanve(adata)
    adata_pre = adata_preprocess(adata)
    
    for pre_type in [
        'raw','s.impute_p','s.impute',
    ]:
        adata = adata_pre.copy()
        if pre_type == 'raw':
            sc.pp.pca(adata)
            sc.pp.neighbors(adata)
            sc.tl.leiden(adata)
            label = adata.obs['leiden']

        elif pre_type == 's.impute':
            sc.pp.neighbors(adata,use_rep='spanve.imputed')
            sc.tl.leiden(adata)
            label = adata.obs['leiden']
        
        elif pre_type == 's.impute_p':
            sc.pp.neighbors(adata,use_rep='spanve.imputed.pre')
            sc.tl.leiden(adata)
            label = adata.obs['leiden']
        
        recoder.loc[len(recoder),:] = data_id,ari(ground,label),ami(ground,label),pre_type,'Leiden'

In [None]:
recoder.to_csv('./recode/IMPUTEDLIBD/LD.res.csv')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

f = plt.figure(dpi=150)
sns.boxplot(
    data=recoder,
    y='ari',x = 'pre_type'
)

# Louvain

In [None]:
from Softs.Spanve import *
from tqdm import tqdm

data_dir = './recode/IMPUTEDLIBD/processed/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]

for data_id in tqdm(data_ids):
    adata = sc.read_h5ad(
        os.path.join(data_dir,data_id)
    )
    ground = adata.obs['ground_truth']
    
    model = Spanve(adata)
    adata_pre = adata_preprocess(adata)
    
    for pre_type in ['raw','s.impute','s.impute_p']:
        adata = adata_pre.copy()
        if pre_type == 'raw':
            sc.pp.pca(adata)
            sc.pp.neighbors(adata)
            sc.tl.louvain(adata)
            label = adata.obs['louvain']

        elif pre_type == 's.impute':
            sc.pp.neighbors(adata,use_rep='spanve.imputed')
            sc.tl.louvain(adata)
            label = adata.obs['louvain']
        
        elif pre_type == 's.impute_p':
            sc.pp.neighbors(adata,use_rep='spanve.imputed.pre')
            sc.tl.louvain(adata)
            label = adata.obs['louvain']
        
        recoder.loc[len(recoder),:] = data_id,ari(ground,label),ami(ground,label),pre_type,'louvain'

In [None]:
recoder.to_csv('./recode/IMPUTEDLIBD/LV.res.csv')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

f = plt.figure(dpi=150)
sns.boxplot(
    data=recoder,
    y='ari',x = 'pre_type'
)

# BayesSpace
New Statics Method

In [None]:
data_dir = './recode/IMPUTEDLIBD/processed/'
data_ids = [i for i in os.listdir(data_dir) if i.endswith('.h5ad')]
recoder = pd.DataFrame(columns = ['data','ari','ami','pre_type','clust_method'])

for data_id in data_ids:

        labels = pd.read_csv(f"./recode/IMPUTEDLIBD/BS/{data_id.replace('.h5ad','')}_BS.out.csv",index_col=0)
        adata = sc.read_h5ad(
            os.path.join(data_dir,data_id)
        )
        ground = adata.obs['ground_truth']
        # ground = adata.obs['layer_guess_reordered']
        labels.index = adata.obs_names
        ground = ground.dropna()

        recoder.loc[len(recoder),:] = data_id,ari(ground,labels.loc[ground.index,'spanve_i']),ami(ground,labels.loc[ground.index,'spanve_i']),'spanve_impute','bayes_space'
        recoder.loc[len(recoder),:] = data_id,ari(ground,labels.loc[ground.index,'base']),ami(ground,labels.loc[ground.index,'base']),'raw','bayes_space'


In [None]:
recoder.to_csv('./recode/IMPUTEDLIBD/BS.res.csv')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# recoder = pd.read_csv('./recode/IMPUTEDLIBD/LD.res.csv',index_col=0)

f = plt.figure(dpi=150)
sns.boxplot(
    data=recoder,
    y='ari',x = 'pre_type'
)