In [5]:
from sklearn.metrics import fbeta_score
import pandas as pd

def calculate_micro_f2_per_label(data, label):
    """
    Calculate micro F2 score for a specific label.

    Args:
        data (pd.DataFrame): DataFrame containing true and predicted labels.
        label (str): The column name of the label to calculate F2 score.

    Returns:
        float: Micro F2 score for the given label.
    """
    return fbeta_score(data[f"{label}_true"], data[f"{label}_pred"], beta=2.0, pos_label=1, average='binary', zero_division=0)


def evaluate_micro_f2(labels, predictions):
    """
    Evaluate micro F2 scores for all target labels.

    Args:
        labels (pd.DataFrame): True labels for the dataset.
        predictions (pd.DataFrame): Predicted labels for the dataset.

    Returns:
        dict: Dictionary containing micro F2 scores for each label and overall score.
    """
    # Merge the true labels and predictions on id and sentence_index
    merged = pd.merge(labels, predictions, on=['id', 'sentence_index'], suffixes=('_true', '_pred'))
    print(merged.head())
    # Define the target labels
    target_labels = [
        "omittable", "measure", "extension", "atelectasis", "satellite",
        "lymphadenopathy", "pleural", "distant"
    ]

    # Initialize a dictionary to store results
    results = {}

    # Calculate F2 scores for each label
    for label in target_labels:
        results[f"{label.capitalize()} micro F2.0"] = calculate_micro_f2_per_label(merged, label)

    # Calculate the overall F2 score (excluding omittable because of special handling)
    overall_labels = merged[[f"{label}_true" for label in target_labels]]
    overall_preds = merged[[f"{label}_pred" for label in target_labels]]
    results["Overall micro F2.0"] = fbeta_score(
        overall_labels.values.ravel(), overall_preds.values.ravel(), beta=2.0,  pos_label=1, average='binary', zero_division=0
    )

    return results


In [8]:
label = pd.read_csv('../radnlp_2024_train_val_20240731/ja/sub_task/val/label.csv')
submission = pd.read_csv('../radnlp_2024_train_val_20240731/ja/sub_task/val/label.csv')

evaluate_micro_f2(label, submission)

       id  sentence_index  omittable_true  measure_true  extension_true  \
0  147290               0               0             1               0   
1  147290               1               1             0               0   
2  147290               2               1             0               0   
3  147290               3               1             0               0   
4  241752               0               0             1               0   

   atelectasis_true  satellite_true  lymphadenopathy_true  pleural_true  \
0                 0               0                     0             0   
1                 0               0                     0             0   
2                 0               0                     0             0   
3                 0               0                     0             0   
4                 0               0                     0             0   

   distant_true  omittable_pred  measure_pred  extension_pred  \
0             0               0  

{'Omittable micro F2.0': 1.0,
 'Measure micro F2.0': 1.0,
 'Extension micro F2.0': 1.0,
 'Atelectasis micro F2.0': 1.0,
 'Satellite micro F2.0': 1.0,
 'Lymphadenopathy micro F2.0': 1.0,
 'Pleural micro F2.0': 1.0,
 'Distant micro F2.0': 1.0,
 'Overall micro F2.0': 1.0}