In [6]:
import pandas as pd
import numpy as np

In [2]:
# Define a function to evaluate predictions
def evaluate_predictions(labels, submission):
    # Helper function to determine coarse equivalence
    def map_to_coarse_T(value):
        if value in ['Tis', 'T1mi', 'T1a', 'T1b', 'T1c']:
            return 'T1'
        elif value in ['T2a', 'T2b']:
            return 'T2'
        return value

    def map_to_coarse_M(value):
        if value in ['M1a', 'M1b', 'M1c']:
            return 'M1'
        return value

    # Merge the data on 'id'
    merged = pd.merge(labels, submission, on='id', suffixes=('_label', '_pred'))

    # Fine-grained accuracies
    merged['T_correct_fine'] = merged['t_label'] == merged['t_pred']
    merged['N_correct_fine'] = merged['n_label'] == merged['n_pred']
    merged['M_correct_fine'] = merged['m_label'] == merged['m_pred']
    merged['Joint_correct_fine'] = (
        merged['T_correct_fine'] & 
        merged['N_correct_fine'] & 
        merged['M_correct_fine']
    )

    # Coarse-grained mappings
    merged['t_label_coarse'] = merged['t_label'].map(map_to_coarse_T)
    merged['t_pred_coarse'] = merged['t_pred'].map(map_to_coarse_T)
    merged['m_label_coarse'] = merged['m_label'].map(map_to_coarse_M)
    merged['m_pred_coarse'] = merged['m_pred'].map(map_to_coarse_M)

    # Coarse-grained accuracies
    merged['T_correct_coarse'] = merged['t_label_coarse'] == merged['t_pred_coarse']
    merged['M_correct_coarse'] = merged['m_label_coarse'] == merged['m_pred_coarse']
    merged['Joint_correct_coarse'] = (
        merged['T_correct_coarse'] & 
        merged['N_correct_fine'] & 
        merged['M_correct_coarse']
    )

    # Calculate metrics
    metrics = {
        'Joint accuracy (fine)': merged['Joint_correct_fine'].mean(),
        'T accuracy (fine)': merged['T_correct_fine'].mean(),
        'N accuracy (fine)': merged['N_correct_fine'].mean(),
        'M accuracy (fine)': merged['M_correct_fine'].mean(),
        'Joint accuracy (coarse)': merged['Joint_correct_coarse'].mean(),
        'T accuracy (coarse)': merged['T_correct_coarse'].mean(),
        'N accuracy (coarse)': merged['N_correct_fine'].mean(),  # Same as fine
        'M accuracy (coarse)': merged['M_correct_coarse'].mean()
    }

    return metrics, merged

# Execute the function
metrics_result, merged_result = evaluate_predictions(labels, submission)

# Display the metrics
metrics_result



{'Joint accuracy (fine)': 0.7037037037037037,
 'T accuracy (fine)': 0.8518518518518519,
 'N accuracy (fine)': 0.9259259259259259,
 'M accuracy (fine)': 0.8333333333333334,
 'Joint accuracy (coarse)': 0.7962962962962963,
 'T accuracy (coarse)': 0.9444444444444444,
 'N accuracy (coarse)': 0.9259259259259259,
 'M accuracy (coarse)': 0.8888888888888888}

In [7]:
label_file_path = '../radnlp_2024_train_val_20240731/ja/main_task/val/label.csv'
submission_file_path = '../submission_gpt-4o-2024-05-13.csv'

labels = pd.read_csv(label_file_path)
submission = pd.read_csv(submission_file_path)

# Execute the function
metrics_result, merged_result = evaluate_predictions(labels, submission)

# Display the metrics
metrics_result

{'Joint accuracy (fine)': 0.7037037037037037,
 'T accuracy (fine)': 0.8518518518518519,
 'N accuracy (fine)': 0.9259259259259259,
 'M accuracy (fine)': 0.8333333333333334,
 'Joint accuracy (coarse)': 0.7962962962962963,
 'T accuracy (coarse)': 0.9444444444444444,
 'N accuracy (coarse)': 0.9259259259259259,
 'M accuracy (coarse)': 0.8888888888888888}

In [4]:
label_file_path = '../radnlp_2024_train_val_20240731/ja/main_task/val/label.csv'
submission_file_path = '../submission_gpt-4o-mini-2024-07-18.csv'

labels = pd.read_csv(label_file_path)
submission = pd.read_csv(submission_file_path)

# Execute the function
metrics_result, merged_result = evaluate_predictions(labels, submission)

# Display the metrics
metrics_result


{'Joint accuracy (fine)': 0.35185185185185186,
 'T accuracy (fine)': 0.5925925925925926,
 'N accuracy (fine)': 0.7407407407407407,
 'M accuracy (fine)': 0.8703703703703703,
 'Joint accuracy (coarse)': 0.5185185185185185,
 'T accuracy (coarse)': 0.7592592592592593,
 'N accuracy (coarse)': 0.7407407407407407,
 'M accuracy (coarse)': 0.9444444444444444}

In [5]:
label_file_path = '../radnlp_2024_train_val_20240731/ja/main_task/val/label.csv'
submission_file_path = '../submission_o1-preview-2024-09-12.csv'

labels = pd.read_csv(label_file_path)
submission = pd.read_csv(submission_file_path)

# Execute the function
metrics_result, merged_result = evaluate_predictions(labels, submission)

# Display the metrics
metrics_result

{'Joint accuracy (fine)': 0.9074074074074074,
 'T accuracy (fine)': 0.9444444444444444,
 'N accuracy (fine)': 0.9629629629629629,
 'M accuracy (fine)': 1.0,
 'Joint accuracy (coarse)': 0.9259259259259259,
 'T accuracy (coarse)': 0.9629629629629629,
 'N accuracy (coarse)': 0.9629629629629629,
 'M accuracy (coarse)': 1.0}