In [6]:
import os
import math
from glob import glob
import numpy as np
import pandas as pd
import cv2
from tqdm.notebook import tqdm
import openslide
import libtiff
from shapely.geometry import Polygon
import lxml.etree as ET
from kneed import KneeLocator
from scipy.interpolate import splprep, splev
from typing import List, Dict

libtiff.libtiff_ctypes.suppress_warnings()

In [7]:
tissue_kernel_1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
tissue_kernel_2 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))

def get_otsu_mask(_img):
    _, _, v = cv2.split(cv2.cvtColor(_img, cv2.COLOR_RGB2YUV))
    v = cv2.medianBlur(v, 5)
    _, _mask = cv2.threshold(v, -1, 255, cv2.THRESH_OTSU)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_OPEN, tissue_kernel_1, iterations=2)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_CLOSE, tissue_kernel_1, iterations=2)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_OPEN, tissue_kernel_1, iterations=5)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_CLOSE, tissue_kernel_1, iterations=5)
    return _mask

def get_gray_mask(_img):
    v = cv2.cvtColor(_img, cv2.COLOR_RGB2GRAY)
    v = cv2.medianBlur(v, 5)
    _, _mask = cv2.threshold(v, -1, 255, cv2.THRESH_OTSU)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_OPEN, tissue_kernel_1, iterations=2)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_CLOSE, tissue_kernel_1, iterations=2)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_OPEN, tissue_kernel_1, iterations=5)
    _mask = cv2.morphologyEx(_mask, cv2.MORPH_CLOSE, tissue_kernel_1, iterations=5)
    _mask = ~_mask
    return _mask

def get_level(n, cnt):
    if (n % 2) == 0:
        return get_level((n // 2), cnt+1)
    else:
        return cnt

def get_mask(slide_name, magnification):
    slide_path = glob(f'/workspace/data3/A100_data/*/*/{slide_name}.svs')
    if len(slide_path) == 0:
        slide_path = glob(f'/workspace/data4/A100_data/*/*/{slide_name}.svs')
        if len(slide_path) == 0:
            slide_path = glob(f'/workspace/data3/A100_data/*/{slide_name}.svs')
    slide_path = slide_path[0]
    
    wsi_slide = openslide.OpenSlide(slide_path)
    mpp = wsi_slide.properties['aperio.MPP']
    if float(mpp) < 0.40:
        ORG_MAG = 40
    else:
        ORG_MAG = 20
    
    LEVEL = get_level(int(ORG_MAG // 1.25), 0)
    w_, h_ = wsi_slide.level_dimensions[LEVEL//2]
    wsi_array = np.array(wsi_slide.read_region([0, 0], LEVEL//2, [w_, h_]))
    if LEVEL % 2:
        wsi_array = cv2.resize(wsi_array, (w_//2, h_//2))
    otsu_mask = get_otsu_mask(wsi_array)
    gray_mask = get_gray_mask(wsi_array)
    tissue_mask = cv2.bitwise_or(otsu_mask, gray_mask)
    
    LEVEL = get_level(int(ORG_MAG // magnification), 0)
    w_, h_ = wsi_slide.level_dimensions[LEVEL//2]
    if LEVEL % 2:
        w_, h_ = w_//2, h_//2
    # tissue_mask = cv2.resize(tissue_mask, (w_, h_))
    
    return tissue_mask, mpp, LEVEL

def calculate_contour_distance(contour1, contour2): 
    (x1, y1), r1 = cv2.minEnclosingCircle(contour1)
    (x2, y2), r2 = cv2.minEnclosingCircle(contour2)
    a = x2 - x1
    b = y2 - y1
    return math.sqrt((a * a) + (b * b))

def merge_contours(contour1, contour2):
    return np.concatenate((contour1, contour2), axis=0)

def agglomerative_cluster(contours, threshold_distance=500.0):
    current_contours = contours
    while len(current_contours) > 1:
        min_distance = None
        min_coordinate = None

        for x in range(len(current_contours)-1):
            for y in range(x+1, len(current_contours)):
                distance = calculate_contour_distance(current_contours[x], current_contours[y])
                if min_distance is None:
                    min_distance = distance
                    min_coordinate = (x, y)
                elif distance < min_distance:
                    min_distance = distance
                    min_coordinate = (x, y)

        if min_distance < threshold_distance:
            index1, index2 = min_coordinate
            current_contours[index1] = merge_contours(current_contours[index1], current_contours[index2])
            del current_contours[index2]
        else: 
            break

    return current_contours

def resize_image(image, magnification):
    height, width, _ = image.shape
    new_size = (width // int(magnification * 0.8), height // int(magnification * 0.8))
    return cv2.resize(image, new_size, cv2.INTER_MAX)

def get_points_list(contour_points, level, mag):
    downsample = 2 ** (level + 2)
    offset = {'X': 0, 'Y': 0}
    points_list = []
    for contour in contour_points:
        points = []
        for point in contour:
            x = point[0][0] * downsample + offset['X']
            y = point[0][1] * downsample + offset['Y']
            points.append({'X': x, 'Y': y})
        x = contour[0][0][0] * downsample + offset['X']
        y = contour[0][0][1] * downsample + offset['Y']
        points.append({'X': x, 'Y': y})
        points_list.append(points)
    return points_list

def get_hull_points_list(hull_lp, level, mag):
    downsample = 2**(level + 2)
    offset={'X': 0,'Y': 0}
    pointsList = []
    for j in range(np.shape(hull_lp)[0]):
        pointList = []
        for i in range(np.shape(hull_lp[j])[0]):
            point = {'X': (hull_lp[j][i][0][0] * downsample) + offset['X'], 'Y': (hull_lp[j][i][0][1] * downsample) + offset['Y']}
            pointList.append(point)
        point = {'X': (hull_lp[j][0][0][0] * downsample) + offset['X'], 'Y': (hull_lp[j][0][0][1] * downsample) + offset['Y']}
        pointList.append(point)
        pointsList.append(pointList)
    return pointsList

def make_hull(new_maskPoints_l):
    hull_l = []
    for i in range(len(new_maskPoints_l)):
        hull = cv2.convexHull(new_maskPoints_l[i])
        hull_l.append(hull)

    hull_lp = []
    for i in range(len(hull_l)):
        x,y = hull_l[i].T
        x = x.tolist()[0]
        y = y.tolist()[0]
        tck, u = splprep([x,y], u=None, s=1.0, per=1)
        u_new = np.linspace(u.min(), u.max(), 150)
        x_new, y_new = splev(u_new, tck, der=0)
        res_array = [[[int(i[0]), int(i[1])]] for i in zip(x_new,y_new)]
        smooth = np.asarray(res_array, dtype=np.int32)
        hull_lp.append(smooth)
    return hull_lp

In [8]:
def to_xml(slide_name = 'A100_00206_03_stm_c1', mag = 5, thres = 0.3, thresdist = 4):
    tissue_mask, mpp, level = get_mask(slide_name, mag)

    read_path = '/workspace/data4/changwoo/SDP/mask/Carcinoma_v4/'+str(mag)+'X/'+f'{slide_name}.png'
    img = cv2.imread(read_path)
    
    img = resize_image(img, mag)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_gray = cv2.bitwise_and(img_gray, tissue_mask)
    img_blur = cv2.GaussianBlur(img_gray, (0, 0), 5)

    thres1 = int(thres * 255)
    _, mask = cv2.threshold(img_blur, thres1, 255, cv2.THRESH_BINARY_INV)
    maskPoints, contours = cv2.findContours(np.uint8(mask), mode = cv2.RETR_TREE, method = cv2.CHAIN_APPROX_NONE)
    maskPoints_l = sorted(maskPoints, key = cv2.contourArea, reverse = True)
    maskPoints_l = maskPoints_l[1:]
    
    pointsList = get_points_list(maskPoints_l, level, mag)

    Annotations = ET.Element('Annotations')
    # create new xml Tree - Annotations
    name1 = 'threshold '+str(thres)
    Annotations = ET.Element('Annotations', attrib={'MicronsPerPixel': mpp})
    Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Id': '1', 'Name': name1, 'ReadOnly': '0', 'NameReadOnly': '0', 'LineColorReadOnly': '0', 'Incremental': '0', 'Type': '4', 'LineColor': '6640479', 'Visible': '1', 'Selected': '1', 'MarkupImagePath': '', 'MacroName': ''})
    Regions = ET.SubElement(Annotation, 'Regions')
    for i in range(1, len(pointsList)+1):
        regionID = i
        Region = ET.SubElement(Regions, 'Region', attrib={'Id': str(regionID), 'Type': '0', 'NegativeROA': '0', 'InputRegionId': '0', 'Analyze': '1', 'DisplayId': str(regionID)})
        Vertices = ET.SubElement(Region, 'Vertices')
        for point in pointsList[i-1]: # add new Vertex
            ET.SubElement(Vertices, 'Vertex', attrib={'X': str(point['X']), 'Y': str(point['Y']), 'Z': '0'})

    distance = thresdist * 125
    new_maskPoints_l = agglomerative_cluster(maskPoints_l, threshold_distance = distance)
    
    hull_lp = make_hull(new_maskPoints_l)
    pointsList = get_hull_points_list(hull_lp, level, mag)

    # create new xml Tree - Annotations
    name2 = 'threshold distance ' + str(thresdist) + 'mm'
    Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Id': '2', 'Name': name2, 'ReadOnly': '0', 'NameReadOnly': '0', 'LineColorReadOnly': '0', 'Incremental': '0', 'Type': '4', 'LineColor': '65535', 'Visible': '1', 'Selected': '1', 'MarkupImagePath': '', 'MacroName': ''})
    Regions = ET.SubElement(Annotation, 'Regions')
    for i in range(1, len(pointsList)+1):
        regionID = i
        Region = ET.SubElement(Regions, 'Region', attrib={'Id': str(regionID), 'Type': '0', 'NegativeROA': '0', 'InputRegionId': '0', 'Analyze': '1', 'DisplayId': str(regionID)})
        Vertices = ET.SubElement(Region, 'Vertices')
        for point in pointsList[i-1]: # add new Vertex
            ET.SubElement(Vertices, 'Vertex', attrib={'X': str(point['X']), 'Y': str(point['Y']), 'Z': '0'})

    xml_data = ET.tostring(Annotations, pretty_print=True)

    save_path = '/workspace/src/chaehyeon/png_contour/xml/5X/'+f'{slide_name}.xml'
    with open(save_path, 'w') as f:
        f.write(xml_data.decode())

In [None]:
df = pd.read_csv('/workspace/src/chaehyeon/png_contour/Carcinoma_v4_5X_TP.csv')
df = df.reset_index(drop=True)
name_list = df['name'].tolist()
name_list = name_list[8:]
cnt = 0
for i in range(len(name_list)):
    cnt += 1
    to_xml(name_list[i])
print(cnt)

  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
  (i, m, i)))
