# Workflow to find F1 score between manual annotation evaluation of IHC image and segmentation result of inferred H&E image from CODA segmentation model:

### Part 1: Code to turn manual IHC annotations saved in .xml file to a binary mask and a tissue map:

In [51]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import matplotlib.pyplot as plt

In [58]:
src = r"\\10.99.68.178\ashleyex\Type_1_diabetes\IHC to HE model\annotation_jpg"
file_src = [os.path.join(src,x) for x in os.listdir(src)]
file_src = [x for x in file_src if x.split("_")[-1][:3] == "IHC"] # all six folders

In [15]:
# First, input xml_filepath and output a dataframe of X,Y coordinates in general. (can be used for ROI as well)
def xml_to_df(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    append_df = []
    for index, Annotation in enumerate(root.iter("Annotation")):
        for Region in Annotation.iter('Region'):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([int(Region.get('Id'))])
            classnames = index + 1
            coord_dict = {"ClassNames": [classnames], "X": [x], "Y": [y], "ID": [id]}
            df = pd.DataFrame(data=coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    coord_df = pd.concat(append_df).reset_index(drop=True)
    return (coord_df)

In [16]:
# Then, input xml_path to use xml_to_df function to output X,Y coordinates for each annotation per class:
def coord_to_multiclass_df(xml_path):
    coord_df = xml_to_df(xml_path)
    coord_df = coord_df.drop(columns=["ID"])
    dict = {"islet": 1, "duct": 2, "vessels": 3, "fat": 4, "acini": 5, "ecm": 6, "whitespace": 7,
            "nerves": 8}
    coord_df = coord_df.replace({"ClassNames": dict})
    return coord_df

In [31]:
# Then input original image and the coord_df to output the mask with unique annotations (1..N, N = 8 in this case):
def create_mask_multi_annot(xml_path, image_size = (256,256)):
    mask = np.zeros(image_size, dtype=np.uint8)
    iter_order = [6, 5, 4, 1, 2, 3, 8, 7] #[ecm acini fat islet duct vessels nerves whitespace/noise]
    coord_df = coord_to_multiclass_df(xml_path)  #use function above
    for i in iter_order:
        coord_df_tmp = coord_df[coord_df.ClassNames == i]
        for idx, row in coord_df_tmp.iterrows():
            xx = row.X.astype(float).astype('int32')
            yy = row.Y.astype(float).astype('int32')
            contours = np.array(list(zip(xx, yy)))
            class_number = row.ClassNames
            mask = cv2.fillPoly(mask, pts=[contours.astype(int)], color=(int(class_number)))
            binary_mask = mask > 0
            binary_mask = binary_mask.astype(np.uint8)
    return mask, binary_mask

In [59]:
# for loop of using create_binary_mask_label to save binary and image_mask in the annotation folder:
for file in tqdm(file_src):
    xml_path = [os.path.join(file,x) for x in os.listdir(file) if x.endswith(".xml")]
    img_path = [os.path.join(file,x) for x in os.listdir(file) if x.endswith(".jpg")]
    if len(xml_path) != len(img_path):
        assert("xml and .jpg files mismatch")
    mask_save_path = os.path.join(file,"mask")
    bin_mask_save_path = os.path.join(file,"bin_mask")
    if not os.path.exists(mask_save_path):
        os.makedirs(mask_save_path)
    if not os.path.exists(bin_mask_save_path):
        os.makedirs(bin_mask_save_path)
    for xml_file in tqdm(xml_path):
        image_name = os.path.basename(xml_file).replace("xml","png")
        mask_test, bin_mask_test = create_mask_multi_annot(xml_file)
        Image.fromarray(mask_test).save(os.path.join(mask_save_path,image_name))
        Image.fromarray(bin_mask_test).save(os.path.join(bin_mask_save_path,image_name))

  0%|          | 0/6 [00:00<?, ?it/s]
  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:05,  3.60it/s][A
 20%|██        | 4/20 [00:00<00:01, 11.64it/s][A
 30%|███       | 6/20 [00:00<00:01, 13.62it/s][A
 45%|████▌     | 9/20 [00:00<00:00, 16.53it/s][A
 60%|██████    | 12/20 [00:00<00:00, 18.69it/s][A
 75%|███████▌  | 15/20 [00:00<00:00, 20.09it/s][A
100%|██████████| 20/20 [00:01<00:00, 16.43it/s][A
 17%|█▋        | 1/6 [00:01<00:06,  1.23s/it]
  0%|          | 0/20 [00:00<?, ?it/s][A
 15%|█▌        | 3/20 [00:00<00:00, 24.92it/s][A
 30%|███       | 6/20 [00:00<00:00, 24.19it/s][A
 45%|████▌     | 9/20 [00:00<00:00, 24.37it/s][A
 60%|██████    | 12/20 [00:00<00:00, 24.25it/s][A
 75%|███████▌  | 15/20 [00:00<00:00, 22.52it/s][A
100%|██████████| 20/20 [00:00<00:00, 23.15it/s][A
 33%|███▎      | 2/6 [00:02<00:04,  1.01s/it]
  0%|          | 0/20 [00:00<?, ?it/s][A
 15%|█▌        | 3/20 [00:00<00:00, 25.01it/s][A
 30%|███       | 6/20 [00:00<00:00, 27

### Code to use the above saved binary mask and apply it to the pix2pix, pyramid-pix2pix, and I2SB segmentation map:
### For pix2pix:


In [75]:
# for the file name, for IHCA, replace islet with HE, for B, replace vessel with HE, for C, replace immune with HE for file base name:
p2p_mask_dir = r"\\10.99.68.51\Kyu\IHC2HE\Balanced_Aligned\dataset_v1_256x256\infer\test\pix2pix\classification_10162023"
for file in tqdm(file_src):
    bin_mask_file = os.path.join(file,"bin_mask")
    bin_mask_path_src = [os.path.join(bin_mask_file,x) for x in os.listdir(bin_mask_file) if x.endswith(".png")]
    infer_mask_save_path = os.path.join(file,"pix2pix_mask")
    if not os.path.exists(infer_mask_save_path):
        os.makedirs(infer_mask_save_path)
    for bin_mask_path in tqdm(bin_mask_path_src):
        mask_name = os.path.basename(bin_mask_path)
        if "IHCA" in file:
            file_name = mask_name.replace("ISLET","HE")
            file_name = file_name.replace(".png","_output.png")
        elif "IHCB" in file:
            file_name = mask_name.replace("VESSEL","HE")
            file_name = file_name.replace(".png","_output.png")
        elif "IHCC" in file:
            file_name = mask_name.replace("IMMUNE","HE")
            file_name = file_name.replace(".png","_output.png")
        infer_mask_path = os.path.join(p2p_mask_dir,file_name)
        infer_mask = np.array(cv2.imread(infer_mask_path,0))
        bin_mask = np.array(cv2.imread(bin_mask_path,0))
        infer_mask_edit = infer_mask * bin_mask
        Image.fromarray(infer_mask_edit).save(os.path.join(infer_mask_save_path,file_name))

  0%|          | 0/6 [00:00<?, ?it/s]
  0%|          | 0/20 [00:00<?, ?it/s][A
  0%|          | 0/6 [00:00<?, ?it/s]


TypeError: unsupported operand type(s) for *: 'NoneType' and 'int'

In [76]:
infer_mask_path

'\\\\10.99.68.51\\Kyu\\IHC2HE\\Balanced_Aligned\\dataset_v1_256x256\\infer\\pix2pix\\classification_10162023\\sample 1_IHCA 08_23_2023_HE_image_tile_00074_12_output.png'

### For pyramid-pix2pix:

In [None]:
# p_p2p_mask_dir =


### For I2SB-uncond:


In [None]:
# i2sb_mask_dir =

### For I2SB-cond:


In [None]:
# i2sb_mask_dir =