In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.spatial import distance_matrix
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, homogeneity_completeness_v_measure, fowlkes_mallows_score, silhouette_score
from sklearn.metrics.cluster import contingency_matrix
from sklearn.preprocessing import StandardScaler

In [None]:
def calculate_ARI(groundtruth, cluster):
    ari = adjusted_rand_score(groundtruth, cluster)
    return ari
def calculate_NMI(groundtruth, cluster):
    nmi = normalized_mutual_info_score(groundtruth, cluster)
    return nmi
def calculate_FMI(groundtruth, cluster):
    fmi = fowlkes_mallows_score(groundtruth, cluster)
    return fmi
def calculate_Purity(groundtruth, cluster):
    # compute contingency matrix
    cm = contingency_matrix(groundtruth, cluster)
    return np.sum(np.amax(cm, axis=0)) / np.sum(cm)
def calculate_v_measure(groundtruth, cluster):
    homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(groundtruth, cluster)
    return homogeneity, completeness, v_measure

In [None]:
def fx_1NN(i,location_in):
    location_in = np.array(location_in)
    dist_array = distance_matrix(location_in[i,:][None,:],location_in)[0,:]
    dist_array[i] = np.inf
    return np.min(dist_array)
    
def fx_kNN(i,location_in,k,cluster_in):
    location_in = np.array(location_in)
    cluster_in = np.array(cluster_in)
    dist_array = distance_matrix(location_in[i,:][None,:],location_in)[0,:]
    dist_array[i] = np.inf
    ind = np.argsort(dist_array)[:k]
    cluster_use = np.array(cluster_in)
    if np.sum(cluster_use[ind]!=cluster_in[i])>(k/2):
        return 1
    else:
        return 0
def calculate_PAS(label,location):
        
    label = np.array(label)
    location = np.array(location)
    matched_location = location
    results = [fx_kNN(i, matched_location, k=10, cluster_in=label) for i in range(matched_location.shape[0])]
    return np.sum(results)/len(label)
def calculate_CHAOS(label, location):

    label = np.array(label)
    location = np.array(location)
    matched_location = StandardScaler().fit_transform(location)

    label_unique = np.unique(label)
    dist_val = np.zeros(len(label_unique))
    count = 0
    for k in label_unique:
        location_cluster = matched_location[label==k,:]
        if len(location_cluster)<=2:
            continue
        n_location_cluster = len(location_cluster)
        results = [fx_1NN(i,location_cluster) for i in range(n_location_cluster)]
        dist_val[count] = np.sum(results)
        count = count + 1

    return np.sum(dist_val)/len(label)
def calculate_ASW(adata, pred_key, spatial_key='spatial'):
    d = squareform(pdist(adata.obsm[spatial_key]))
    #print(d)
    return silhouette_score(X=d, labels=adata.obs[pred_key], metric='precomputed')

In [None]:
def evaluate_cluster_matrix(adata, tech, tissue, dataset, sample, method):


    gt = adata.obs['ground_truth']
    cluster = adata.obs['predicted']
          
    ari = calculate_ARI(gt, cluster)
    nmi = calculate_NMI(gt, cluster)
    fmi = calculate_FMI(gt, cluster)
    ps = calculate_Purity(gt, cluster)
    homogeneity, completeness, v_measure = calculate_v_measure(gt, cluster)
    

    pas_gt = calculate_PAS(gt, adata.obsm['spatial'])
    chaos_gt = calculate_CHAOS(gt, adata.obsm['spatial'])
    asw_gt = calculate_ASW(adata, 'ground_truth')

    pas_cluster = calculate_PAS(cluster, adata.obsm['spatial'])
    chaos_cluster = calculate_CHAOS(cluster, adata.obsm['spatial'])       
    asw_cluster = calculate_ASW(adata, key_dic.get('predicted')
   
    df = pd.DataFrame(columns=["Tech", "Tissue", "Dataset", "Sample", "Method", "ARI", "NMI", "FMI", "Purity", "Homogeneity", "Completeness", "V_measure", 
                                     "pas_gt", "pas_cluster", "chaos_gt", "chaos_cluster", "asw_gt", "asw_cluster"])
    
    df = df._append(pd.Series([tech, tissue, dataset, sample, method, ari, nmi, fmi, ps, homogeneity, completeness, v_measure, 
                               pas_gt, pas_cluster, chaos_gt, chaos_cluster, asw_gt, asw_cluster],
                              index=["Tech", "Tissue", "Dataset", "Sample", "Method", "ARI", "NMI", "FMI", "Purity", "Homogeneity", "Completeness", "V_measure", 
                                     "pas_gt", "pas_cluster", "chaos_gt", "chaos_cluster", "asw_gt", "asw_cluster"]), ignore_index=True)
    return df

In [None]:
def calculate_and_rank(df, value_column, index_column='Method', columns_column='Tissue'):

    median_table = df.pivot_table(values=value_column, index=index_column, columns=columns_column, aggfunc='median')
    ranked_table = median_table.rank(ascending=False, method='min').astype(int)
    return ranked_table

columns_to_calculate = ['ARI', 'NMI', 'FMI', 'Purity', 'Homogeneity', 'Completeness', 'V_measure']
ranked_tables = {}
for column in columns_to_calculate:
    ranked_tables[column] = calculate_and_rank(df_10X, value_column=column)

In [None]:
ranked_tables['Accuracy_Score'] = (ranked_tables['ARI'] + ranked_tables['NMI'] + ranked_tables['FMI'] + ranked_tables['Purity'] + ranked_tables['Homogeneity'] + ranked_tables['Completeness']) / 6