# Install Dependencies

In [1]:
import pandas as pd
from sklearn.metrics import rand_score, adjusted_rand_score
import numpy as np

# Formulation of the Modified Rand Index (MRI)

In [None]:
import itertools

# TODO: Reimplement the current MRI function without forming a network
def modified_rand_score(labels_true, labels_pred, nodes_true_list, nodes_pred_list):
    """
    Compute the Modified Rand Index (MRI), an extension of the Rand Index
    that accounts for missing elements between partitions.

    Parameters
    ----------
    labels_true : list
        True labels corresponding to the nodes.
    labels_pred : list
        Predicted labels corresponding to the nodes.
    nodes_true_list : list
        An ordered list of nodes corresponding to labels_true.
    nodes_pred_list : list
        An ordered list of nodes corresponding to labels_pred.

    Returns
    -------
    mri : float
        The Modified Rand Index score.
    """

    n = len(labels_true)
    total_pairs = n * (n - 1) // 2  # All possible unique node pairs
    n_00, n_11, n_xx = 0, 0, 0  # Initialize counts

    # Convert lists to sets for fast lookup
    nodes_true_set = set(nodes_true_list)
    nodes_pred_set = set(nodes_pred_list)

    # Identify common nodes between the two partitions
    common_nodes = nodes_true_set.intersection(nodes_pred_set)

    # Iterate over all node pairs
    for i, j in itertools.combinations(range(n), 2):  # Iterate over all unique pairs
        node_i, node_j = nodes_true_list[i], nodes_true_list[j]

        # If either node is missing from the common set, count this pair as n_xx.
        if node_i not in common_nodes or node_j not in common_nodes:
            n_xx += 1
        else:
            same_true = (labels_true[i] == labels_true[j])
            same_pred = (labels_pred[i] == labels_pred[j])
            if same_true and same_pred:
                n_11 += 1  # Agreeing pair (same in both)
            elif not same_true and not same_pred:
                n_00 += 1  # Agreeing pair (different in both)

    # Compute MRI Score
    if total_pairs == 0:
        return 1.0  # Perfect match in trivial cases

    MRI = (n_00 + n_11 + n_xx) / total_pairs
    return MRI

# Testing the MRI formula

## Sample test cases

In [3]:
test_cases = [
    {
        "name": "Perfect Match",
        "labels_true": ["0", "0", "1", "1"],
        "labels_pred": ["0", "0", "1", "1"],
        "nodes_true": ["A", "B", "C", "D"],
        "nodes_pred": ["A", "B", "C", "D"]
    },
    {
        "name": "Completely Different Clustering",
        "labels_true": ["0", "0", "1", "1"],
        "labels_pred": ["1", "1", "0", "0"],
        "nodes_true": ["A", "B", "C", "D"],
        "nodes_pred": ["A", "B", "C", "D"]
    },
    {
        "name": "Missing Nodes in Predicted Partition",
        "labels_true": ["0", "0", "1", "1", "2"],
        "labels_pred": ["0", "0", "1", "1"],  # Missing 'E'
        "nodes_true": ["A", "B", "C", "D", "E"],
        "nodes_pred": ["A", "B", "C", "D"]
    },
    {
        "name": "Partial Overlap with Missing Nodes",
        "labels_true": ["0", "0", "1", "1", "2", "2"],
        "labels_pred": ["0", "1", "0", "1"],  # Missing 'E' and 'F'
        "nodes_true": ["A", "B", "C", "D", "E", "F"],
        "nodes_pred": ["A", "B", "C", "D"]
    }
]

## Running the metrics (RI, ARI, MRI) on the sample test cases

In [4]:
for case in test_cases:
    labels_true = case["labels_true"]
    labels_pred = case["labels_pred"]
    nodes_true = case["nodes_true"]
    nodes_pred = case["nodes_pred"]

    mri_score = modified_rand_score(labels_true, labels_pred, nodes_true, nodes_pred)
    ri_score = rand_score(labels_true[:len(labels_pred)], labels_pred)  # Match sizes
    ari_score = adjusted_rand_score(labels_true[:len(labels_pred)], labels_pred)

    print(f"Test Case: {case['name']}")
    print(f"  MRI: {mri_score:.4f}")
    print(f"  RI: {ri_score:.4f}")
    print(f"  ARI: {ari_score:.4f}")
    print("-" * 40)

Test Case: Perfect Match
  MRI: 1.0000
  RI: 1.0000
  ARI: 1.0000
----------------------------------------
Test Case: Completely Different Clustering
  MRI: 1.0000
  RI: 1.0000
  ARI: 1.0000
----------------------------------------
Test Case: Missing Nodes in Predicted Partition
  MRI: 1.0000
  RI: 1.0000
  ARI: 1.0000
----------------------------------------
Test Case: Partial Overlap with Missing Nodes
  MRI: 0.7333
  RI: 0.3333
  ARI: -0.5000
----------------------------------------
