In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
# Define a function to evaluate predictions
def evaluate_predictions(labels, submission):
    # Helper function to determine coarse equivalence
    def map_to_coarse_T(value):
        if value in ['Tis', 'T1mi', 'T1a', 'T1b', 'T1c']:
            return 'T1'
        elif value in ['T2a', 'T2b']:
            return 'T2'
        return value

    def map_to_coarse_M(value):
        if value in ['M1a', 'M1b', 'M1c']:
            return 'M1'
        return value

    # Merge the data on 'id'
    merged = pd.merge(labels, submission, on='id', suffixes=('_label', '_pred'))

    # Fine-grained accuracies
    merged['T_correct_fine'] = merged['t_label'] == merged['t_pred']
    merged['N_correct_fine'] = merged['n_label'] == merged['n_pred']
    merged['M_correct_fine'] = merged['m_label'] == merged['m_pred']
    merged['Joint_correct_fine'] = (
        merged['T_correct_fine'] & 
        merged['N_correct_fine'] & 
        merged['M_correct_fine']
    )

    # Coarse-grained mappings
    merged['t_label_coarse'] = merged['t_label'].map(map_to_coarse_T)
    merged['t_pred_coarse'] = merged['t_pred'].map(map_to_coarse_T)
    merged['m_label_coarse'] = merged['m_label'].map(map_to_coarse_M)
    merged['m_pred_coarse'] = merged['m_pred'].map(map_to_coarse_M)

    # Coarse-grained accuracies
    merged['T_correct_coarse'] = merged['t_label_coarse'] == merged['t_pred_coarse']
    merged['M_correct_coarse'] = merged['m_label_coarse'] == merged['m_pred_coarse']
    merged['Joint_correct_coarse'] = (
        merged['T_correct_coarse'] & 
        merged['N_correct_fine'] & 
        merged['M_correct_coarse']
    )

    # Calculate metrics
    metrics = {
        'Joint accuracy (fine)': float(merged['Joint_correct_fine'].mean()),
        'T accuracy (fine)': float(merged['T_correct_fine'].mean()),
        'N accuracy (fine)': float(merged['N_correct_fine'].mean()),
        'M accuracy (fine)': float(merged['M_correct_fine'].mean()),
        'Joint accuracy (coarse)': float(merged['Joint_correct_coarse'].mean()),
        'T accuracy (coarse)': float(merged['T_correct_coarse'].mean()),
        'N accuracy (coarse)': float(merged['N_correct_fine'].mean()),  # Same as fine
        'M accuracy (coarse)': float(merged['M_correct_coarse'].mean())
    }

    return metrics, merged

def show_metrics(pred_path, gt_path):
    print(os.path.basename(pred_path))
    # Load the ground truth and predictions
    gt = pd.read_csv(gt_path)
    pred = pd.read_csv(pred_path)

    # Evaluate the predictions
    metrics, merged = evaluate_predictions(gt, pred)
    return metrics


In [3]:
label_file_path = '../radnlp_2024_train_val_20240731/ja/main_task/val/label.csv'
submission_file_path = './model_outputs/submission_uth.csv'

show_metrics(submission_file_path, label_file_path)

submission_uth.csv


{'Joint accuracy (fine)': 0.2037037037037037,
 'T accuracy (fine)': 0.42592592592592593,
 'N accuracy (fine)': 0.8148148148148148,
 'M accuracy (fine)': 0.7592592592592593,
 'Joint accuracy (coarse)': 0.24074074074074073,
 'T accuracy (coarse)': 0.42592592592592593,
 'N accuracy (coarse)': 0.8148148148148148,
 'M accuracy (coarse)': 0.8148148148148148}