In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from artifact_detector_model import MARKER_NAMES

In [None]:
df_markers = pd.read_csv("predicted_all_embed.csv")[["image_path"] + MARKER_NAMES]

In [None]:
df_main = pd.read_csv(
    "/vol/biomedic3/data/EMBED/tables/mammo-net-csv/embed-non-negative.csv"
)
output_dir = "output/density-balanced/version_1"
print(len(df_main))
df_main["image_id"] = df_main["image_path"].apply(
    lambda img_path: img_path.split("/")[-1]
)
df_markers["image_id"] = df_markers["image_path"].apply(
    lambda img_path: img_path.split("/")[-1]
)
df_main.drop(columns="image_path", inplace=True)
df_full = df_main.merge(df_markers, how="inner")
df_full = df_full.loc[df_full["compression"] == 0]
print(len(df_full))

In [None]:
from ast import literal_eval

predictions = pd.read_csv(output_dir + "/predictions.csv")
predictions["probability"] = predictions["probability"].apply(
    lambda x: " ".join(x.split()).replace(" ", ",")
)
predictions["probability"] = predictions["probability"].apply(
    lambda x: np.asarray(literal_eval(x))
)
predictions["image_id"] = predictions["image_id"].apply(
    lambda img_path: img_path.split("/")[-1]
)
img_ids = predictions["image_id"].values
preds = np.stack(predictions["probability"].values)
targets = predictions["label"].values

In [None]:
circle_image_id = df_full.loc[
    (df_full["image_id"].isin(img_ids)) & (df_full["circle marker"] == 1), "image_id"
]
circle_idx = np.where([img_id in circle_image_id.values for img_id in img_ids])[0]
triangle_image_id = df_full.loc[
    (df_full["image_id"].isin(img_ids)) & (df_full["triangle marker"] == 1), "image_id"
]
triangle_idx = np.where([img_id in triangle_image_id.values for img_id in img_ids])[0]
pacemaker_image_id = df_full.loc[
    (df_full["image_id"].isin(img_ids)) & (df_full["devices"] == 1), "image_id"
]
pacemaker_idx = np.where([img_id in pacemaker_image_id.values for img_id in img_ids])[0]
breast_implant_image_id = df_full.loc[
    (df_full["image_id"].isin(img_ids)) & (df_full["breast implant"] == 1), "image_id"
]
breast_implant_idx = np.where(
    [img_id in breast_implant_image_id.values for img_id in img_ids]
)[0]
compression_image_id = df_full.loc[
    (df_full["image_id"].isin(img_ids)) & (df_full["compression"] == 1), "image_id"
]
compression_idx = np.where(
    [img_id in compression_image_id.values for img_id in img_ids]
)[0]
normal_image_id = df_full.loc[
    (df_full["image_id"].isin(img_ids))
    & (df_full["compression"] == 0)
    & (df_full["devices"] == 0)
    & (df_full["circle marker"] == 0)
    & (df_full["triangle marker"] == 0)
    & (df_full["breast implant"] == 0),
    "image_id",
]
normal_idx = np.where([img_id in normal_image_id.values for img_id in img_ids])[0]
circle_idx.shape, triangle_idx.shape, pacemaker_idx.shape, breast_implant_idx.shape, compression_idx.shape, normal_idx.shape

In [None]:
print(
    f"All - balanced accuracy: {balanced_accuracy_score(targets, np.argmax(preds, 1)):.3f}"
)
print(
    f"Normal images: {balanced_accuracy_score(targets[normal_idx], np.argmax(preds, 1)[normal_idx]):.3f}"
)
print(
    f"Images with circle: {balanced_accuracy_score(targets[circle_idx], np.argmax(preds, 1)[circle_idx]):.3f}"
)
print(
    f"Images with triangle: {balanced_accuracy_score(targets[triangle_idx], np.argmax(preds, 1)[triangle_idx]):.3f}"
)
print(
    f"Images with implant: {balanced_accuracy_score(targets[breast_implant_idx], np.argmax(preds, 1)[breast_implant_idx]):.3f}"
)
print(
    f"Pacemaker: {balanced_accuracy_score(targets[pacemaker_idx], np.argmax(preds, 1)[pacemaker_idx]):.3f}"
)

In [None]:

plt.rcParams["font.family"] = "Serif"
f, ax = plt.subplots(2, 3, figsize=(12, 8), facecolor="none")
f.subplots_adjust(hspace=0.4, wspace=0.3)
ax = ax.ravel()
results = {
    'all': [np.arange(targets.shape[0]), "All images"],
    'no markers': [normal_idx, "No markers"],
    'circle': [circle_idx, "Circle markers"],
    'triangles': [triangle_idx, "Triangle markers"],
    'implants': [breast_implant_idx, "Breast implants"],
    'devices': [pacemaker_idx, "Devices"]
}
for i, (select_idx, title) in enumerate(results.values()):
    cf_matrix = confusion_matrix(targets[select_idx], np.argmax(preds[select_idx], 1))
    group_counts = [f"{value:0.0f}" for value in cf_matrix.flatten()]
    group_percentages = [
        f"{value:.0%}"
        for value in (cf_matrix / np.sum(cf_matrix, 1, keepdims=True)).flatten()
    ]
    labels = [f"{v3}\n(N={v2})" for v2, v3 in zip(group_counts, group_percentages)]
    labels = [f"{v3}" for v2, v3 in zip(group_counts, group_percentages)]
    labels = np.asarray(labels).reshape(4, 4)
    sns.heatmap(
        (cf_matrix / np.sum(cf_matrix, 1, keepdims=True)),
        annot=labels,
        fmt="",
        cmap="Blues",
        ax=ax[i],
        vmin=0,
        vmax=1,
        cbar=False,
        annot_kws={'fontsize': 17}
    )
    ax[i].set_title(' '.join([r'$\bf{' + t +'}$' for t in title.split(' ')]), fontsize=20)
[ax[i].set_xlabel('') for i in range(3)]
[ax[i].set_ylabel('') for i in [1, 2, 4, 5]]
[ax[i].set_ylabel('True label', fontsize=20) for i in [0,3]]
[ax[i].set_xlabel('Predicted label', fontsize=20) for i in [3,4,5]]

plt.savefig("output/confusion_density.pdf", bbox_inches="tight");