# Install Dependencies

In [1]:
import pandas as pd
from sklearn.metrics import pair_confusion_matrix
import numpy as np

In [2]:
def modified_rand_score(labels_true, labels_pred):
    """
    Computes the Modified Rand Index (MRI), an extension of the Rand Index
    that accounts for missing elements between partitions.
    
    Parameters:
    - labels_true: array-like, shape (n_samples,)
      Ground truth labels (e.g., KEGG or Cho dataset).
      
    - labels_pred: array-like, shape (n_samples,)
      Predicted labels from a clustering algorithm.

    Returns:
    - MRI: float
      A similarity score between 0.0 and 1.0, where higher values indicate better alignment.
    """
    
    # Compute confusion matrix components
    contingency = pair_confusion_matrix(labels_true, labels_pred)
    
    # Extract agreement and disagreement values
    n_00, n_01, n_10, n_11 = contingency.ravel()
    
    # Compute n_xx (pairs where at least one node is missing in one partition)
    total_nodes_true = len(set(labels_true))  # Unique classes in true labels
    total_nodes_pred = len(set(labels_pred))  # Unique classes in predicted labels
    missing_pairs = abs(total_nodes_true - total_nodes_pred) * (abs(total_nodes_true - total_nodes_pred) - 1) / 2
    n_xx = int(missing_pairs)

    # Compute total pairs
    total_pairs = n_00 + n_11 + n_01 + n_10 + n_xx

    # Compute MRI score
    if total_pairs == 0:
        return 1.0  # Perfect match in trivial cases

    MRI = (n_00 + n_11 + n_xx) / total_pairs
    return MRI