In [2]:
import pandas as pd
import scanpy as sc
from scDMFK.io import get_embedding, draw_umap
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, adjusted_rand_score, normalized_mutual_info_score

In [None]:
def calculate_cluster_results(data, true_labels, seed):
    if not isinstance(data, np.ndarray):
        data = np.array(data)
    
    n_clusters = len(np.unique(true_labels))
    np.random.seed(seed) #set random seet for reproduceable results
    
    # get UMAP 2-D embedding
    embedding = get_embedding(data)
    draw_umap(embedding, true_labels)
    
    # K-means for clustering umi embedings
    kmeans = KMeans(n_clusters=n_clusters).fit(embedding)
    labels = kmeans.labels_

    # Calculate metrics
    ari = adjusted_rand_score(true_labels, labels)
    nmi = normalized_mutual_info_score(true_labels, labels)
    sc = silhouette_score(data, labels)

    print(f'Adjusted Rand Index: {ari}')
    print(f'Normalized Mutual Information: {nmi}')
    print(f'Silhouette Coefficient: {sc}')
    
    return ari, nmi, sc


## 1. Endoderm Cells

In [None]:
data_files = ['endoderm','Quake_10x_Bladder','Quake_10x_Limb_Muscle','Quake_10x_Spleen','Young']
for data in data_files:
    file_path = 'results/' + data + '/results.h5ad'

In [8]:
# data = 'endoderm'
end_multinom = sc.read('results/' + data + '/results-multinomial.h5ad')
end_zinb = sc.read('results/' + data + '/results-zinb.h5ad')
end_wmse = sc.read('results/' + data + '/results-weight mse.h5ad')
end_mmse = sc.read('results/' + data + '/results-mask mse.h5ad')