# Parse XML annotation file with X,Y coordinates and instance ID into a DataFrame

In [1]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def xml_to_df(xml_filepath):

    tree = ET.parse(xml_filepath)
    root = tree.getroot()
    classlist = []
    for Annotation in root.iter("Annotation"):
        for Attribute in Annotation.iter("Attribute"):
            classlist.append(Attribute.attrib.get('Name'))
    classlists = sorted(classlist)
    append_df = []
    for index, Annotation in enumerate(root.iter("Annotation")):
        for Region in Annotation.iter('Region'): #iterate over the Region so we can iterate over id 1 and 2 (two circles):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([int(Region.get('Id'))])
            classnames = classlists[index]
            coord_dict = {"ClassNames": [classnames], "X": [x], "Y": [y], "ID": [id]}
            df = pd.DataFrame(data = coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    coord_df = pd.concat(append_df).reset_index(drop=True)
    return(coord_df)

In [3]:
coord_df = xml_to_df(r'\\fatherserverdw\kyuex\clue images\annotations\roi\2022-06-07 13.18.40.xml')
coord_df

Unnamed: 0,ClassNames,X,Y,ID
0,tissue,"[5615, 5725, 5810, 5865, 5900, 5915, 5930, 594...","[10850, 10890, 10930, 10950, 10960, 10970, 109...",1
1,tissue,"[24299, 24364, 24483, 24537, 24613, 24624, 246...","[20025, 20068, 20133, 20176, 20209, 20231, 202...",2


# Convert X,Y Coordinates to Binary Mask and Read and Resize Image:

In [21]:
import openslide
import os
os.environ["OPENCV_IO_MAX_IMAGE_PIXELS"] = pow(2,40).__str__()
import cv2

def df_to_image_mask(xml_filepath, ndpi_filepath,downsample_factor):
    coord_df = xml_to_df(xml_filepath)
    slide = openslide.open_slide(ndpi_filepath)
    rgb_dim = slide.dimensions
    print("Dimension of level 0 of the image is: ",rgb_dim)
    slide_level_dim = slide.level_dimensions
    num_levels = len(slide_level_dim)
    print("Number of levels in this image are:",num_levels)
    factors = slide.level_downsamples
    print("Each level is downsampled by:",factors)
    blank = np.zeros(rgb_dim, dtype = np.uint8) #white
    for idx, row in coord_df.iterrows():
        xx = row.X.astype('int')
        yy = row.Y.astype('int')
        xy = list(zip(xx,yy))
        contours = np.array(xy)
        mask = cv2.fillPoly(blank, pts=[contours.astype(int)], color=idx+125)
    target_level = slide.get_best_level_for_downsample(downsample_factor)
    target_dim = slide.level_dimensions[target_level]
    rsf = [x/y for x,y in zip(rgb_dim,target_dim)]
    image_resized = slide.read_region(location=(0,0),level=target_level,size=target_dim)
    mask_resized = cv2.resize(mask,[_//round(rsf[0]) for _ in mask.shape],interpolation=cv2.INTER_NEAREST)
    #cv2.imwrite(r"\\fatherserverdw\Kevin\\binarymask.jpg",mask_resized) #save binary mask
    return image_resized,mask_resized

# Cropping RGB Image with Mask:

In [22]:
image,mask = df_to_image_mask(r'\\fatherserverdw\kyuex\clue images\annotations\roi\2022-06-07 13.18.40.xml',r'\\fatherserverdw\kyuex\clue images\2022-06-07 13.18.40.ndpi',10)

Dimension of level 0 of the image is:  (48000, 47872)
Number of levels in this image are: 8
Each level is downsampled by: (1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0)


In [6]:
def crop_final_mask(image,mask):
    imagearr = np.array(image)
    imagearr = imagearr[:,:,:3]
    maskarr = np.array(mask)
    maskrgb = np.repeat(maskarr[:,:,np.newaxis],3,axis=2)
    for tissueID in range(1,np.max(maskrgb[:])): #from first tissue id to end
        #masking
        masktmp = mask==tissueID #boolean
        masktmp = np.repeat(masktmp[:,:,np.newaxis],3,axis=2) #change shape to match shape of imagearr
        final_image = np.multiply(imagearr,masktmp)
        #crop
        [x_crop,y_crop] = np.where(final_image[:,:,0]>0)
        cropped_final_image = final_image[np.min(x_crop):np.max(x_crop),np.min(y_crop):np.max(y_crop)]
        cropped_final_image = cropped_final_image[:,:,::-1]
        return cropped_final_image
        cv2.imwrite(r"\\fatherserverdw\Kevin\\" + "final_image" + str(tissueID) + ".jpg",cropped_final_image)

In [7]:
crop_final_mask(image,mask)

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

# Now create mask for multiple classes:

In [8]:
# coord_df_1 = xml_to_df(r'\\fatherserverdw\kyuex\clue images\annotations\12class\unhealthy_wrongbody\2022-06-07 13.18.40.xml')
# coord_df_1 = coord_df_1.drop(columns = ["ID"])
# dict = {"corneum" : 1,"spinosum": 2,"hairshaft":3,"hairfollicle":4,"smoothmuscle":5,"oil":6,"sweat":7,"nerve":8,"bloodvessel":9,"ecm":10,"fat":11,"white":12}
# coord_df_1 = coord_df_1.replace({"ClassNames": dict})
# coord_df_1.to_pxxckle(r'\\fatherserverdw\Kevin\coord_df.pkl')

coord_df_1 = pd.read_pickle(r'\\fatherserverdw\Kevin\coord_df.pkl')
coord_df_1

Unnamed: 0,ClassNames,X,Y
0,9,"[7833, 7833, 7833, 7834, 7834, 7835, 7835, 783...","[6594, 6593, 6592, 6592, 6591, 6591, 6589, 658..."
1,9,"[8596, 8597, 8597, 8598, 8598, 8600, 8600, 860...","[6019, 6019, 6018, 6015, 6014, 6014, 6013, 601..."
2,9,"[10214, 10215, 10215, 10215, 10216, 10216, 102...","[4918, 4918, 4915, 4914, 4914, 4913, 4913, 491..."
3,9,"[8993, 8992, 8991, 8990, 8990, 8989, 8988, 898...","[5313, 5313, 5313, 5313, 5315, 5315, 5315, 531..."
4,9,"[7521, 7521, 7521, 7521, 7521, 7522, 7522, 752...","[7007, 7006, 7004, 7003, 7002, 7002, 7001, 700..."
...,...,...,...
144,12,"[33402, 33404, 33405, 33406, 33406, 33407, 334...","[20590, 20590, 20590, 20590, 20591, 20591, 205..."
145,12,"[39948, 39955, 39959, 39970, 39981, 39989, 400...","[39749, 39749, 39749, 39749, 39749, 39752, 397..."
146,12,"[39193, 39189, 39185, 39181, 39178, 39178, 391...","[37963, 37963, 37974, 37978, 37982, 37986, 379..."
147,12,"[39767, 39769, 39770, 39772, 39773, 39774, 397...","[38084, 38084, 38084, 38085, 38085, 38086, 380..."


In [9]:
# for loop:

# slide = openslide.open_slide(r'\\fatherserverdw\kyuex\clue images\2022-06-07 13.18.40.ndpi')
# target_level = slide.get_best_level_for_downsample(10)
# target_dim = slide.level_dimensions[target_level]
# rsf = [x/y for x,y in zip(slide.dimensions,target_dim)]
# image = slide.read_region(location=(0,0),level=target_level,size=target_dim)
# imagearr = np.array(image)
# imagearr = imagearr[:,:,:3]
# blank = np.zeros(target_dim, dtype = np.uint8)
#
# iter_order = [2,10,5,4,6,11,7,9,8,12,3,1]
# for i in iter_order:
#     coord_df_1tmp = coord_df_1[coord_df_1.ClassNames == i]
#     for idx, row in coord_df_1tmp.iterrows():
#         xx = row.X.astype('int32')
#         yy = row.Y.astype('int32')
#         # rgb_dim = (int(max(yy)),int(max(xx)))
#         # xy = list(zip(xx-min(xx),yy-min(yy)))
#         contours = np.array(list(zip(xx,yy)))
#         contours = contours/rsf[0]
#         class_number = coord_df_1.ClassNames.values[idx]
#         mask = cv2.fillPoly(mask, pts=[contours.astype(int)], color=(int(class_number)))



        # maskarr = np.array(mask)
        # masktmp = maskarr == class_number #boolean
        # masktmp = np.repeat(masktmp[:,:,np.newaxis],3,axis=2)
        # masktmp = masktmp.astype(np.uint8)
        # imagearr = cv2.resize(imagearr,np.array([int(masktmp.shape[1]),int(masktmp.shape[0])]),interpolation=cv2.INTER_NEAREST)
        # final_image = np.multiply(imagearr,masktmp) # both 8265x6605x3
        # [x_crop,y_crop] = np.where(final_image[:,:,0]>0)
        # cropped_final_image = final_image[np.min(y_crop):np.max(y_crop),np.min(x_crop):800] #np.max(x_crop) is not working for some reason
        # aaa
        #cv2.imwrite(r"\\fatherserverdw\Kevin\multiple_class\\" + "final_image" + str(idx) + ".jpg",cropped_final_image)

In [10]:
#label the binary version of constructed mask with unique pixel value for each annotation circle
#iterate thru the labeled binary mask to crop connected objects
#use the coordinate for the crop above to crop the constructed mask
#also crop the rgb image
#save both constructed mask and rgb image


In [11]:
# mask_of_unique_annotations #differnet annotation has differnet pixel values 1....N annotations
# binary_mask = mask_of_unique_annotations>0
# binary_mask_label = skimage.measure.label(binary_mask)
#
# for idx,label in enumerate(range(1,np.max(binary_mask_label))):
#     labeltmp = binary_mask_label[binary_mask_label==label]
#     [x,y] = np.where(labeltmp)
#     targetmask = mask_of_unique_annotations[min(x):max(x),min(y):max(y)]
#     dstpth = idx+'.png'
#     Image.fromarray(targetmask).save(dstpth)
#     targetim = rgbim[min(x):max(x),min(y):max(y),:]
#
#
#


In [12]:
import pyelastix as pe
import imageio.v2 as imageio
import os
from PIL import Image
import time

path = r'\\fatherserverdw\Q\research\images\CLUE\3D study\he\4xr_jpg'
image_files_path = [os.path.join(path, file) for file in os.listdir(path) if file.endswith('.jpg')]

image1 = imageio.imread(image_files_path[0])
image2 = imageio.imread(image_files_path[1])
image1 = image1[:,:,1].astype('float32')
image2 = image2[:,:,1].astype('float32')
print(image1.shape == image2.shape)

False


In [13]:
def pad_images_to_same_size(images):
    width_max = 0
    height_max = 0
    for img in images: #pad image to the largest image
        h, w = img.shape[:2]
        width_max = max(width_max, w)
        height_max = max(height_max, h)

    images_padded = []
    for img in images:
        h, w = img.shape[:2]
        diff_vert = height_max - h
        pad_top = diff_vert//2
        pad_bottom = diff_vert - pad_top
        diff_hori = width_max - w
        pad_left = diff_hori//2
        pad_right = diff_hori - pad_left
        img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=255)
        assert img_padded.shape[:2] == (height_max, width_max)
        images_padded.append(img_padded)

    return images_padded

image1, image2 = pad_images_to_same_size([image1,image2])
print(image1.shape == image2.shape)

padded_image1 = Image.fromarray(image1)
padded_image1 = padded_image1.convert("L")
padded_image1.save(r'\\fatherserverdw\Kevin\paddedimage1.jpg')

padded_image2 = Image.fromarray(image2)
padded_image2 = padded_image2.convert("L")
padded_image2.save(r'\\fatherserverdw\Kevin\paddedimage2.jpg')

True


In [14]:
params = pe.get_default_params()
params.MaximumNumberOfIterations = 200
params.NumberOfResolutions = 3

start = time.time()
image3, field = pe.register(image1,image2,params)
end = time.time()
print("time it took to register: "+  str(end-start) + " seconds")

img3 = Image.fromarray(image3)
img3.show()
img3 = img3.convert("L")
img3.save(r'\\fatherserverdw\Kevin\image3.jpg')

Found elastix version: 5.0.1 in 'c:\\program files\\elastix-5.0.1-win64\\elastix.exe'
Calling Elastix to register images ...

resolution 0, iter 1
resolution 0, iter 2
resolution 0, iter 3
resolution 0, iter 4
resolution 0, iter 5
resolution 0, iter 6
resolution 0, iter 7
resolution 0, iter 8
resolution 0, iter 9
resolution 0, iter 10
resolution 0, iter 11
resolution 0, iter 12
resolution 0, iter 13
resolution 0, iter 14
resolution 0, iter 15
resolution 0, iter 16
resolution 0, iter 17
resolution 0, iter 18
resolution 0, iter 19
resolution 0, iter 20
resolution 0, iter 21
resolution 0, iter 22
resolution 0, iter 23
resolution 0, iter 24
resolution 0, iter 25
resolution 0, iter 26
resolution 0, iter 27
resolution 0, iter 28
resolution 0, iter 29
resolution 0, iter 30
resolution 0, iter 31
resolution 0, iter 32
resolution 0, iter 33
resolution 0, iter 34
resolution 0, iter 35
resolution 0, iter 36
resolution 0, iter 37
resolution 0, iter 38
resolution 0, iter 39
resolution 0, iter 40
res

In [15]:
#overlay image 3 on top of image 1 and image 2 to see how well the image 2 is rotated in reference to image 1:
alpha, beta = 0.5, 0.5
overlay_img1_img3 = cv2.addWeighted(image1,alpha,image3,beta,0.0)
img_overlay = Image.fromarray(overlay_img1_img3)
img_overlay.show()
img_overlay = img_overlay.convert("L")
img_overlay.save(r'\\fatherserverdw\Kevin\overlay.jpg')

# another method:
difference = image1 - image3
difference = difference.astype(np.int)
difference_df = pd.DataFrame(difference)
difference_df = difference_df[difference_df>0]
difference_df = difference_df.fillna(0)
diff_counts = np.unique(difference_df,return_counts=True)[1]
sum_diff = np.sum(diff_counts) - diff_counts[0]
print("{} pixel values out of {} total pixel values are different".format(sum_diff,image1.shape[0]*image1.shape[1]))
print("{}% pixel values are different".format(sum_diff*100/(image1.shape[0]*image1.shape[1])))

7380029 pixel values out of 33524928 total pixel values are different
22.013556598838928% pixel values are different
