In [1]:
import pandas as pd
from sklearn.metrics import f1_score
import numpy as np

# OpenI dir
GT_DIR_OPENI = '/path/to/your/input/opnei/directory/'
GEN_DIR_OPENI = '/path/to/your/output/openi/directory/'

# MIMIC dir
GT_DIR_MIMIC = '/path/to/your/input/mimic/directory/'
GEN_DIR_MIMIC = '/path/to/your/output/mimic/directory/'


CXR_LABELS_1 = ["Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Lesion", 
"Lung Opacity", "Edema", "Consolidation", "Pneumonia", 
"Atelectasis", "Pneumothorax", "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices"] # without "No Finding"

In [2]:
import warnings
warnings.filterwarnings('ignore')

## 2. Calculation Function

In [3]:
# Computes negative F1 and negative F1-5 for the labels:
# Edema, Consolidation, Pneumonia, Pneumothorax, Pleural Effusion.
# Also returns a list of Negative F1's for each label
def negative_f1(gt, pred):
    labels = range(13)
    labels_five = list(map(lambda x: CXR_LABELS_1.index(x), 
                           ["Edema", "Consolidation", "Pneumonia", 
                            "Pneumothorax", "Pleural Effusion"]))
    f1_scores = []

    for i in labels:
        score = f1_score(gt[:, i], pred[:, i], zero_division=0)
        f1_scores.append(score)
    f1_scores = np.array(f1_scores)

    neg_f1 = f1_scores.mean()
    neg_f1_five = f1_scores[labels_five].mean()
    return neg_f1, neg_f1_five, f1_scores

In [4]:
# Computes positive F1 and positive F1-5 for all labels except No Finding
# When `use_five` is True, we only calculate F1 with the labels:
# Atelectasis, Consolidation, Edema, Pleural Effusion, Cardiomegaly
def positive_f1(gt, pred):
    labels = range(13)
    labels_five = list(map(lambda x: CXR_LABELS_1.index(x), 
                           ["Cardiomegaly", "Edema", "Consolidation", 
                            "Atelectasis", "Pleural Effusion"]))
    f1_scores = []

    for i in labels:
        score = f1_score(gt[:, i], pred[:, i], zero_division=0)
        f1_scores.append(score)
    f1_scores = np.array(f1_scores)

    pos_f1 = f1_scores.mean()
    pos_f1_five = f1_scores[labels_five].mean()
    return pos_f1, pos_f1_five, f1_scores

In [5]:
# Computes the positive and negative F1 (excluding No Finding)
def compute_f1(df_gt, df_pred):
    y_gt = np.array(df_gt.drop(columns=["study_id", "No Finding"]))

    # Note on labels:
    # 2: unmentioned ; 1: positive ; 0: negative ; -1: uncertain
    y_gt_neg = y_gt.copy()
    y_gt_neg[y_gt_neg == 0] = 3
    y_gt_neg[(y_gt_neg == 1) | (y_gt_neg == 2) | (y_gt_neg == -1)] = 0
    y_gt_neg[y_gt_neg == 3] = 1
    
    y_gt[(y_gt == -1) | (y_gt == 2)] = 0

    y_pred = np.array(df_pred.drop(columns=["study_id", "No Finding"]))

    y_pred_neg = y_pred.copy()
    y_pred_neg[y_pred_neg == 0] = 3
    y_pred_neg[(y_pred_neg == 1) | (y_pred_neg == 2) | (y_pred_neg == -1)] = 0
    y_pred_neg[y_pred_neg == 3] = 1
    
    y_pred[(y_pred == -1) | (y_pred == 2)] = 0

    pos_f1, pos_f1_five, label_pos_f1 = positive_f1(y_gt, y_pred)
    neg_f1, neg_f1_five, label_neg_f1 = negative_f1(y_gt_neg, y_pred_neg)
    return pos_f1, pos_f1_five, neg_f1, neg_f1_five, label_pos_f1, label_neg_f1

## MIMIC 2-class

In [6]:
df_gen = pd.read_csv(GEN_DIR_MIMIC+'gen_labels_sample_2_1.csv').sort_values(by='study_id').reset_index(drop=True)
df_gen.dropna(inplace=True)
df_gen.shape

(300, 15)

In [7]:
df_gt = pd.read_csv(GT_DIR_MIMIC+"gt_labels_sample.csv").sort_values(by='study_id').reset_index(drop=True)
df_gt.dropna(inplace=True)
df_gt.shape

(300, 15)

In [8]:
df_gt = df_gt[df_gt['study_id'].isin(df_gen['study_id'])]
df_gt.shape

(300, 15)

In [9]:
df_temp = df_gt.drop(columns=["study_id", "No Finding"])
df_temp.columns == CXR_LABELS_1

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [10]:
# F1 calculation
pos_f1, pos_f1_five, neg_f1, neg_f1_five, label_pos_f1, label_neg_f1 = compute_f1(df_gt, df_gen)

dict = {}
dict['pos f1'] = pos_f1
dict['pos f1_5'] = pos_f1_five
dict['neg f1'] = neg_f1
dict['neg f1_5'] = neg_f1_five
for col in CXR_LABELS_1:
    dict[col] = label_pos_f1[CXR_LABELS_1.index(col)]

df_metrics1 = pd.DataFrame(dict, index=['2_1']).T
df_metrics1

Unnamed: 0,2_1
pos f1,0.145816
pos f1_5,0.208012
neg f1,0.091242
neg f1_5,0.209201
Enlarged Cardiomediastinum,0.055556
Cardiomegaly,0.26087
Lung Lesion,0.0
Lung Opacity,0.333333
Edema,0.17284
Consolidation,0.0


# Open-i 2-class

In [11]:
df_gen = pd.read_csv(GEN_DIR_OPENI+'gen_labels_sample_2_1.csv').sort_values(by='study_id').reset_index(drop=True)
df_gen.dropna(inplace=True)
df_gen.shape

(237, 15)

In [12]:
df_gt = pd.read_csv(GT_DIR_OPENI+"gt_labels_sample.csv").sort_values(by='study_id').reset_index(drop=True)
df_gt.dropna(inplace=True)
df_gt.shape

(300, 15)

In [13]:
df_gt = df_gt[df_gt['study_id'].isin(df_gen['study_id'])]
df_gt.shape

(237, 15)

In [14]:
df_temp = df_gt.drop(columns=["study_id", "No Finding"])
df_temp.columns == CXR_LABELS_1

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [15]:
pos_f1, pos_f1_five, neg_f1, neg_f1_five, label_pos_f1, label_neg_f1 = compute_f1(df_gt, df_gen)

dict = {}
dict['pos f1'] = pos_f1
dict['pos f1_5'] = pos_f1_five
dict['neg f1'] = neg_f1
dict['neg f1_5'] = neg_f1_five
for col in CXR_LABELS_1:
    dict[col] = label_pos_f1[CXR_LABELS_1.index(col)]

df_metrics1 = pd.DataFrame(dict, index=['2_1']).T
df_metrics1

Unnamed: 0,2_1
pos f1,0.048876
pos f1_5,0.055817
neg f1,0.044735
neg f1_5,0.061341
Enlarged Cardiomediastinum,0.0
Cardiomegaly,0.130178
Lung Lesion,0.0
Lung Opacity,0.300752
Edema,0.0
Consolidation,0.0
