In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from copy import deepcopy
from matplotlib import rcParams
from skimage.measure import regionprops_table

rcParams.update({'figure.autolayout': True})


## 1 Load and transform data

In [None]:
merged_df = pd.read_csv(
    "data/gt_pred_ie_consolidated.csv",
)

In [None]:
merged_df_2 = merged_df.copy()
merged_df_2.rename(columns={"gt_noisy": "silver_standard", "gt_proofread": "gold_standard"}, inplace=True)

merged_df_2["gold_standard"] = merged_df_2["gold_standard"].round(0)
merged_df_2 = merged_df_2[merged_df_2["gold_standard"] < 2]

# get rid of Ki67, IDO, PDL1 in decidua because they contain labelling errors
merged_df_2 = merged_df_2[~np.logical_and(merged_df_2["dataset"] == "decidua", merged_df_2["channel"].isin(["Ki67", "IDO", "PDL1"]))]
merged_df_2["prediction_binary"] = (merged_df_2["nimbus"] >= 0.5).astype(int)

In [None]:
from sklearn.metrics import confusion_matrix

def calc_scores(gt, pred, threshold):
    """Calculate scores for a given threshold
    Args:
        gt (np.array):
            ground truth labels
        pred (np.array):
            predictions
        threshold (float):
            threshold for predictions
    Returns:
        scores (dict):
            dictionary containing scores
    """
    # exclude masked out regions from metric calculation
    pred = pred[gt < 2]
    gt = gt[gt < 2]
    tn, fp, fn, tp = confusion_matrix(
        y_true=gt, y_pred=(pred >= threshold).astype(int), labels=[0, 1]
    ).ravel()
    metrics = {
        "tp": tp, "tn": tn, "fp": fp, "fn": fn,
        "accuracy": (tp + tn) / (tp + tn + fp + fn + 1e-8),
        "precision": tp / (tp + fp + 1e-8),
        "recall": tp / (tp + fn + 1e-8),
        "specificity": tn / (tn + fp + 1e-8),
        "f1_score": 2 * tp / (2 * tp + fp + fn + 1e-8),
    }
    return metrics

results = {}
for dataset in merged_df_2["dataset"].unique():
    tmp_df = merged_df_2[merged_df_2["dataset"] == dataset]
    tmp_df = tmp_df[tmp_df["silver_standard"] != 2]
    scores = calc_scores(tmp_df["gold_standard"].values.astype(np.uint8), tmp_df["nimbus"].astype(np.float32), threshold = 0.5)
    if scores["tp"] + scores["fn"] > 0:
        results[dataset] = scores

pred_vs_gold_df = pd.DataFrame(results).T
pred_vs_gold_df

results = {}
for dataset in merged_df_2["dataset"].unique():
    tmp_df = merged_df_2[merged_df_2["dataset"] == dataset]
    tmp_df = tmp_df[tmp_df["silver_standard"] != 2]
    scores = calc_scores(tmp_df["gold_standard"].values.astype(np.uint8), tmp_df["silver_standard"].astype(np.float32), threshold = 0.5)
    if scores["tp"] + scores["fn"] > 0:
        results[dataset] = scores
silver_vs_gold_df = pd.DataFrame(results).T
silver_vs_gold_df

tmp_df = {}
for m in ["f1_score", "precision", "recall", "specificity"]:
    tmp_df[m] = {
        "Nimbus": pred_vs_gold_df[m].mean(),
        "Silver standard": silver_vs_gold_df[m].mean(),
    }
tmp_df = pd.DataFrame(tmp_df).T


In [None]:
from sklearn.metrics import confusion_matrix

def calc_scores(gt, pred, threshold):
    """Calculate scores for a given threshold
    Args:
        gt (np.array):
            ground truth labels
        pred (np.array):
            predictions
        threshold (float):
            threshold for predictions
    Returns:
        scores (dict):
            dictionary containing scores
    """
    # exclude masked out regions from metric calculation
    pred = pred[gt < 2]
    gt = gt[gt < 2]
    tn, fp, fn, tp = confusion_matrix(
        y_true=gt, y_pred=(pred >= threshold).astype(int), labels=[0, 1]
    ).ravel()
    metrics = {
        "tp": tp, "tn": tn, "fp": fp, "fn": fn,
        "accuracy": (tp + tn) / (tp + tn + fp + fn + 1e-8),
        "precision": tp / (tp + fp + 1e-8),
        "recall": tp / (tp + fn + 1e-8),
        "specificity": tn / (tn + fp + 1e-8),
        "f1_score": 2 * tp / (2 * tp + fp + fn + 1e-8),
    }
    return metrics

results = {}
for dataset in merged_df_2["dataset"].unique():
    tmp_df = merged_df_2[merged_df_2["dataset"] == dataset]
    tmp_df = tmp_df[tmp_df["silver_standard"] != 2]
    for channel in tmp_df["channel"].unique():
        scores = calc_scores(tmp_df[tmp_df["channel"] == channel]["gold_standard"].values.astype(np.uint8), tmp_df[tmp_df["channel"] == channel]["nimbus"].astype(np.float32), threshold = 0.5)
        if scores["tp"] + scores["fn"] > 0:
            results[(dataset, channel)] = scores

pred_vs_gold_df = pd.DataFrame(results).T

results = {}
for dataset in merged_df_2["dataset"].unique():
    tmp_df = merged_df_2[merged_df_2["dataset"] == dataset]
    tmp_df = tmp_df[tmp_df["silver_standard"] != 2]
    for channel in tmp_df["channel"].unique():
        scores = calc_scores(tmp_df[tmp_df["channel"] == channel]["gold_standard"].values.astype(np.uint8), tmp_df[tmp_df["channel"] == channel]["silver_standard"].astype(np.float32), threshold = 0.5)
        if scores["tp"] + scores["fn"] > 0:
            results[(dataset, channel)] = scores
silver_vs_gold_df = pd.DataFrame(results).T


#### Supplement figures 2 a-e

In [None]:
rename_dataset = {'all': "Pan-M",
 'codex_colon': "Codex Colon",
 'vectra_colon': "Vectra Colon",
 'vectra_pancreas': "Vectra Pancreas",
 'mibi_breast': "MIBI-TOF Breast",
 'mibi_decidua': "MIBI-TOF Decidua"}

pred_vs_gold_df["f1_score"]
silver_vs_gold_df["f1_score"]
# plot grouped bar chart for f1 score
tmp_df = pd.concat([pred_vs_gold_df["f1_score"], silver_vs_gold_df["f1_score"]], axis=1)
tmp_df.columns = ["Nimbus", "Silver standard"]
tmp_df.reset_index(inplace=True)
tmp_df.rename(columns={"level_0": "dataset", "level_1": "channel"}, inplace=True)
tmp_df = pd.melt(tmp_df, id_vars=["dataset", "channel"], value_vars=["Nimbus", "Silver standard"], var_name="Model")
tmp_df.rename(columns={"value": "F1 score"}, inplace=True)
tmp_df.channel.replace({"panCK+CK7+CAM5.2": "panCK"}, inplace=True)
ratios = tmp_df.groupby("dataset")["channel"].nunique().values
os.makedirs("figures/supplement", exist_ok=True)
for dataset in tmp_df.dataset.unique():
    tmp_df_2 = tmp_df[tmp_df["dataset"] == dataset]
    fig, ax = plt.subplots()
    tmp_df_2.sort_values("F1 score", inplace=True, ascending=False)
    p = sns.barplot(data=tmp_df_2, x="channel", y="F1 score", hue="Model", hue_order=["Nimbus", "Silver standard"], width=tmp_df_2.channel.nunique()*0.02, ax=ax)
    plt.xticks(rotation=90)
    plt.title(rename_dataset[dataset], fontsize=18)
    plt.xlabel("Channel", fontsize=16)
    plt.ylabel("F1 score", fontsize=16)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.legend(loc='lower left', fontsize=14)
    plt.savefig(f"figures/supplement/f1_score_split_by_channel_{dataset}.svg")
    plt.show()


#### Figure 4 d

##### Cell size

In [None]:
from sklearn.feature_selection import mutual_info_classif
# result storage dicts
results_gs = {}
results_ss = {}

In [None]:
# make visualization of cell wise f1 score
merged_df_3 = merged_df_2[np.logical_and(merged_df_2["gold_standard"] != 2, merged_df_2["silver_standard"] != 2)]
merged_df_3["error_gold"] = np.abs(merged_df_3["prediction_binary"] - merged_df_3["gold_standard"]).astype(int)
merged_df_3["error_silver"] = np.abs(merged_df_3["prediction_binary"] - merged_df_3["silver_standard"]).astype(int)

results_gs["cell_size"] = mutual_info_classif(merged_df_3["cell_size"].values.reshape(-1, 1), merged_df_3["error_gold"])
results_ss["cell_size"] = mutual_info_classif(merged_df_3["cell_size"].values.reshape(-1, 1), merged_df_3["error_silver"])

##### Marker heterogeneity

In [None]:
#  marker heterogeneity measured by the coefficient of variation
heterogeneity = merged_df_3[merged_df_3["gold_standard"] == 1.0].groupby(["dataset", "channel", "fov"], as_index=True).apply(lambda x: x.ie.std()/(x.ie.mean()+1e-8)).to_dict()
merged_df_3["heterogeneity"] = merged_df_3.apply(lambda x: np.abs(heterogeneity[(x["dataset"], x["channel"], x["fov"])]) if (x["dataset"], x["channel"], x["fov"]) in heterogeneity.keys() else None, axis=1)

isna = merged_df_3["heterogeneity"].isna()
results_gs["heterogeneity"] = mutual_info_classif(merged_df_3[~isna]["heterogeneity"].values.reshape(-1, 1), merged_df_3[~isna]["error_gold"])
results_ss["heterogeneity"] = mutual_info_classif(merged_df_3[~isna]["heterogeneity"].values.reshape(-1, 1), merged_df_3[~isna]["error_silver"])

##### Rare markers: Low share of cells that are GT positive for the marker

In [None]:
rarity = merged_df_3.groupby(["dataset", "channel", "fov"], as_index=True).mean("gold_standard")["gold_standard"]
rarity_dict = rarity.to_dict()
merged_df_3["rarity"] = merged_df_3.apply(lambda x: rarity_dict[(x["dataset"], x["channel"], x["fov"])], axis=1)

results_gs["rarity"] = mutual_info_classif(merged_df_3["rarity"].values.reshape(-1, 1), merged_df_3["error_gold"])
results_ss["rarity"] = mutual_info_classif(merged_df_3["rarity"].values.reshape(-1, 1), merged_df_3["error_silver"])

##### Marker sparsity
Share of marker positive cells in the region of a cell 

In [None]:
from imageio import imread
from joblib import Parallel, delayed
from tqdm.notebook import tqdm

def mskcc_pancreas_naming_convention(fname):
    return os.path.join(
        "data/MSKCC_pancreas/segmentation",
        fname + "feature_0.ome.tif"
    )

def mskcc_colon_naming_convention(fname):
    return os.path.join(
        "data/MSKCC_colon/segmentation",
        fname + "feature_0.ome.tif"
    )

def hickey_naming_convention(fov_path):
    fname = os.path.basename(fov_path)
    fov, reg = fname.split("_")[:2]
    fov_path = os.path.join("data/hickey/masks", fov)
    images = os.listdir(fov_path)
    labels = [img for img in images if "_labeled" in img]
    labels = [img for img in labels if reg in img]
    label_fname = labels[0]    
    return os.path.join(os.path.normpath(fov_path), label_fname)

def decidua_naming_convention(fov_path):
    """Prepares the path to the segmentation data for a given fov
    Args:
        fov_path (str): path to fov
    Returns:
        seg_path (str): paths to segmentation fovs
    """
    base_dir = "data/decidua"
    deepcell_output_dir = os.path.join(base_dir, "segmentation_data")
    fov_name = os.path.basename(fov_path)
    return os.path.join(
        deepcell_output_dir, fov_name + "_segmentation_labels.tiff"
    )

def tonic_naming_convention(fov_name):
    return os.path.join(
        os.path.normpath(
            "data/TONIC/segmentation_data/deepcell_output"
        ), fov_name + "_feature_0.tif"
    )

naming_convention = {
    'hickey': hickey_naming_convention,
    'mskcc_colon': mskcc_colon_naming_convention,
    'mskcc_pancreas': mskcc_pancreas_naming_convention,
    'tonic': tonic_naming_convention,
    'decidua': decidua_naming_convention
}
fovs = {
    'hickey': [
        'B010A_reg003_X01_Y01_Z01',
        'B011B_reg001_X01_Y01_Z01',
        'B011B_reg003_X01_Y01_Z01'
    ],
    'mskcc_colon': [
        '3e507f0a3dd2_Colon P20 CD3, Foxp1, PDL1, ICOS, CD8, panCK+CK7+CAM5.2__[54006,21157]_image',
        '49e532ac63a8_3-13 Colon P20 CD3, Foxp1, PDL1, ICOS, CD8, panCK+CK7+CAM5.2__[53850,11905]_image',
        '9c68495d8667_Colon P20 CD3, Foxp1, PDL1, ICOS, CD8, panCK+CK7+CAM5.2__[55647,17034]_image'
    ],
    'mskcc_pancreas': [
        '0852a4103bed Pancreas_PANEL7-10_CD40L,_CD40,_PD1,_PDL1,CD8,CK_[61352,11423]_component_data.tif_image',
        '8f39cecaa5aa Pancreas_PANEL7-10_CD40L,_CD40,_PD1,_PDL1,CD8,CK_[43899,11766]_component_data.tif_image',
        'ce418553b719 Pancreas_PANEL7-10_CD40L,_CD40,_PD1,_PDL1,CD8,CK_[55606,14580]_component_data.tif_image',
    ],
    'tonic': [
        'TONIC_TMA10_R1C1',
        'TONIC_TMA10_R3C6',
        'TONIC_TMA10_R5C4'
    ],
    'decidua': [
        '12_31750_16_12',
        '12_31750_1_10',
        '14_31758_20_4'
    ]

}

fov_list = []

for dataset in fovs.keys():
    for fov in tqdm(fovs[dataset]):
        inst_seg_path = naming_convention[dataset](fov)
        inst_seg = imread(inst_seg_path)
        inst_seg = np.squeeze(inst_seg.astype(np.uint16))
        df = pd.DataFrame(regionprops_table(inst_seg, properties=["label", "centroid"]))
        df["fov"] = fov
        df["dataset"] = dataset
        fov_list.append(df)

In [None]:
# calculate for each cell the number of cells in a 120 pixel radius
from copy import deepcopy

regionprops_df = pd.concat(fov_list)
regionprops_df.rename(columns={"label": "labels"}, inplace=True)
merged_df_4 = merged_df_3.merge(regionprops_df, on=["dataset", "fov", "labels"], how="left")

for radius in [120]:
    merged_df_4["num_cells_region"] = np.nan
    for dataset in merged_df_4["dataset"].unique():
        for fov in merged_df_4["fov"].unique():
            df_tmp = merged_df_4[np.logical_and(merged_df_4["dataset"] == dataset, merged_df_4["fov"] == fov)]
            for marker in tqdm(df_tmp.channel.unique()):
                df_tmp_ = df_tmp[df_tmp["channel"] == marker]
                def calc_num_cells_in_radius(row, radius=radius):
                    h_tmp = df_tmp_[df_tmp_["centroid-0"].between(row["centroid-0"]-radius, row["centroid-0"]+radius, inclusive="both")]
                    w_tmp = h_tmp[h_tmp["centroid-1"].between(row["centroid-1"]-radius, row["centroid-1"]+radius, inclusive="both")]
                    return np.sum(w_tmp["gold_standard"] == 1.0)
                num_cells_region = df_tmp_.apply(lambda x: calc_num_cells_in_radius(x, radius), axis=1)
                merged_df_4.loc[num_cells_region.index, "num_pos_cells_region"] = num_cells_region.values.astype(np.int32)

In [None]:
results_gs["sparsity"] = mutual_info_classif(merged_df_4["num_pos_cells_region"].values.reshape(-1, 1), merged_df_4["error_gold"])
results_ss["sparsity"] = mutual_info_classif(merged_df_4["num_pos_cells_region"].values.reshape(-1, 1), merged_df_4["error_silver"])

##### Cell density

In [None]:
# calculate for each cell the number of cells in a 40 pixel radius
from copy import deepcopy

regionprops_df = pd.concat(fov_list)
regionprops_df.rename(columns={"label": "labels"}, inplace=True)


density_results = {}
for radius in [120]:
    merged_df_4["num_cells_region"] = np.nan
    for dataset in merged_df_4["dataset"].unique():
        for fov in merged_df_4["fov"].unique():
            df_tmp = merged_df_4[np.logical_and(merged_df_4["dataset"] == dataset, merged_df_4["fov"] == fov)]
            for marker in tqdm(df_tmp.channel.unique()):
                df_tmp_ = df_tmp[df_tmp["channel"] == marker]
                def calc_num_cells_in_radius(row, radius=radius):
                    h_tmp = df_tmp_[df_tmp_["centroid-0"].between(row["centroid-0"]-radius, row["centroid-0"]+radius, inclusive="both")]
                    w_tmp = h_tmp[h_tmp["centroid-1"].between(row["centroid-1"]-radius, row["centroid-1"]+radius, inclusive="both")]
                    return np.sum(w_tmp["gold_standard"] != 5.0)
                num_cells_region = df_tmp_.apply(lambda x: calc_num_cells_in_radius(x, radius), axis=1)
                merged_df_4.loc[num_cells_region.index, "num_cells_region"] = num_cells_region.values.astype(np.int32)
    density_results[radius] = deepcopy(merged_df_4)

In [None]:
results_gs["density"] = mutual_info_classif(merged_df_4["num_cells_region"].values.reshape(-1, 1), merged_df_4["error_gold"])
results_ss["density"] = mutual_info_classif(merged_df_4["num_cells_region"].values.reshape(-1, 1), merged_df_4["error_silver"])

##### Plot final results for Figure 3 d

In [None]:
result_df_gs = pd.DataFrame(results_gs).T
result_df_ss = pd.DataFrame(results_ss).T
result_df_gs.rename(columns={0: "Mutual information"}, inplace=True)
result_df_ss.rename(columns={0: "Mutual information"}, inplace=True)

In [None]:
df_gs = result_df_gs
df_ss = result_df_ss

df = pd.DataFrame({"Nimbus":df_gs["Mutual information"].values, "Silver standard": df_ss["Mutual information"].values}, index=df_ss.index)
df = df.loc[["cell_size", "heterogeneity", "rarity", "sparsity", "density"], :]


In [None]:
df_gs = result_df_gs
df_ss = result_df_ss

df = pd.DataFrame({"Nimbus":df_gs["Mutual information"].values, "Silver standard": df_ss["Mutual information"].values}, index=df_ss.index)
df = df.loc[["cell_size", "heterogeneity", "rarity", "sparsity", "density"], :]

font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 12}

plt.rc('font', **font)

fig, ax = plt.subplots(figsize = (5.5, 5.5))
im = ax.imshow(df.values, cmap="Reds", vmin=0., vmax=1.)
ax.set_xticks(np.arange(len(df.columns)), labels=["Nimbus", "Integrated\n Expression"], rotation=45)
ax.xaxis.tick_bottom()
ax.set_yticks(np.arange(len(df.index)), labels=["Smaller cells", "Marker \n heterogeneity", "Marker rarity", "Marker Sparsity", "Cell Density"])
# Rotate the tick labels and set their alignment.

for ii, i in enumerate(df.index):
    for jj, j in enumerate(df.columns):
        text = ax.text(jj, ii, df.loc[i][j].round(4),
                       ha="center", va="center", color="black")
cb1 = plt.colorbar(im, orientation="vertical", ticks=[1.0, 0.0])
cb1.ax.invert_xaxis()
ax.yaxis.set_label_position("right")
ax.set_ylabel("Mutual information", rotation=270, labelpad=16)
ax.set_title("Impact of confounders on\n classification errors")
plt.savefig("figures/figure_3/error_x_confounders_mutual_information.svg", dpi=300, bbox_inches="tight")