# Evaluation

> Scripts to perform evaluation on the data

In [1]:
#| default_exp evaluation

In [2]:
#| export
#| hide
import numpy as np
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, homogeneity_score, completeness_score, v_measure_score, fowlkes_mallows_score, silhouette_score
from sklearn.metrics.cluster import contingency_matrix

In [3]:
#| export
def evaluate_clustering_multiple_labels(latent_representations: np.ndarray,  # The latent space data.
                                        list_of_labels: list,                # List of true labels or a single true labels array.
                                        clustering_method: str = 'kmeans',   # The clustering algorithm to use ('kmeans', 'gmm', 'dbscan').
                                        label_names: list = None,            # Optional names for the label sets.
                                        **kwargs                             # Additional arguments for the clustering algorithm.
                                       ) -> dict:                            # Returns a dictionary with clustering metrics.
    """
    Evaluates the clustering quality of the latent representations for one or multiple sets of labels.
    """
    # Ensure list_of_labels is a list of arrays
    if isinstance(list_of_labels, np.ndarray):
        list_of_labels = [list_of_labels]
    
    # Use default names if label_names are not provided
    if label_names is None:
        label_names = [f'Set_{i+1}' for i in range(len(list_of_labels))]
    
    combined_metrics = {}
    average_metrics = {
        'ARI': 0,
        'NMI': 0,
        'Homogeneity': 0,
        'Completeness': 0,
        'V-Measure': 0,
        'FMI': 0,
        'Purity': 0,
        'Silhouette Score': 0
    }
    num_label_sets = len(list_of_labels)
    
    for i, true_labels in enumerate(list_of_labels):
        # Determine the number of clusters
        n_clusters = len(np.unique(true_labels))
        
        # Apply the selected clustering algorithm
        if clustering_method == 'kmeans':
            clusterer = KMeans(n_clusters=n_clusters, random_state=42, n_init=10, **kwargs)
            pred_labels = clusterer.fit_predict(latent_representations)
        elif clustering_method == 'gmm':
            clusterer = GaussianMixture(n_components=n_clusters, random_state=42, **kwargs)
            pred_labels = clusterer.fit_predict(latent_representations)
        elif clustering_method == 'dbscan':
            clusterer = DBSCAN(**kwargs)
            pred_labels = clusterer.fit_predict(latent_representations)
        else:
            raise ValueError("Unsupported clustering method. Choose from 'kmeans', 'gmm', 'dbscan'.")
        
        # Calculate clustering metrics
        ari = adjusted_rand_score(true_labels, pred_labels)
        nmi = normalized_mutual_info_score(true_labels, pred_labels)
        homogeneity = homogeneity_score(true_labels, pred_labels)
        completeness = completeness_score(true_labels, pred_labels)
        v_measure = v_measure_score(true_labels, pred_labels)
        fmi = fowlkes_mallows_score(true_labels, pred_labels)
        
        # Purity
        cont_matrix = contingency_matrix(true_labels, pred_labels)
        purity = np.sum(np.amax(cont_matrix, axis=0)) / np.sum(cont_matrix)
        
        # Silhouette Score
        silhouette = silhouette_score(latent_representations, pred_labels)
        
        # Store the results for this set of labels
        combined_metrics.update({
            f'{label_names[i]}_ARI': ari,
            f'{label_names[i]}_NMI': nmi,
            f'{label_names[i]}_Homogeneity': homogeneity,
            f'{label_names[i]}_Completeness': completeness,
            f'{label_names[i]}_V-Measure': v_measure,
            f'{label_names[i]}_FMI': fmi,
            f'{label_names[i]}_Purity': purity,
            f'{label_names[i]}_Silhouette Score': silhouette
        })
        
        # Accumulate the results for averaging
        average_metrics['ARI'] += ari
        average_metrics['NMI'] += nmi
        average_metrics['Homogeneity'] += homogeneity
        average_metrics['Completeness'] += completeness
        average_metrics['V-Measure'] += v_measure
        average_metrics['FMI'] += fmi
        average_metrics['Purity'] += purity
        average_metrics['Silhouette Score'] += silhouette
    
    # Compute the average metrics if there are multiple sets of labels
    if num_label_sets > 1:
        for key in average_metrics:
            combined_metrics[f'Average_{key}'] = average_metrics[key] / num_label_sets
    
    return combined_metrics

In [4]:
#| hide
import nbdev; nbdev.nbdev_export()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)