# 定义裁剪图片的函数

In [None]:
import numpy as np
import openslide
import xml.etree.ElementTree as ET
import cv2


def prefect_uniform(img_path, xml_path, save_path, target='train', spilt_size = 512):
    # read_img
    img_open = openslide.OpenSlide(img_path)
    file_name = img_path.split('/')[-1].split('.')[0]
    size = img_open.dimensions
    mask = np.zeros([size[1], size[0]], dtype=np.uint8)
    count = 0

    # read_xml
    tree = ET.parse(xml_path)
    root = tree.getroot()
    annotation = root.findall('Annotations/Annotation')
    for region in annotation:
        point_set1 = []
        for point in region.findall('Coordinates/Coordinate'):
            x = float(point.attrib['X'])
            y = float(point.attrib['Y'])
            point_set1.append([x, y])

        pts1 = np.asarray([point_set1], dtype=np.int32)
        cv2.fillPoly(img=mask, pts=pts1, color=(255, 255, 255))

    # compare
    for region1 in annotation:
        point_mask = np.zeros([size[1], size[0]], dtype=np.uint8)
        point_set2 = []
        for point in region1.findall('Coordinates/Coordinate'):
            x = float(point.attrib['X'])
            y = float(point.attrib['Y'])
            point_set2.append([x, y])

        left_point = min(point_set2, key=lambda a: a[0])[0]
        right_point = max(point_set2, key=lambda a: a[0])[0]
        bottom_point = min(point_set2, key=lambda a: a[1])[1]
        top_point = max(point_set2, key=lambda a: a[1])[1]
        mask_size = max(int(right_point - left_point), (int(top_point - bottom_point)))

        temp_array = np.array([left_point, bottom_point], dtype=np.int32)
        left_point = temp_array[0]
        bottom_point = temp_array[1]

        pts = np.asarray([point_set2], dtype=np.int32)
        cv2.fillPoly(img=point_mask, pts=pts, color=(255, 255, 255))

        # 标注尺寸小于512且附近无其他标注则保存
        if (mask_size <= 512):

            save_img = img_open.read_region(location=(left_point, bottom_point), size=(spilt_size, spilt_size), level=0)
            save_img.save(save_path + target + '/' + 'img/' + file_name + '_' + str(count) + '.png')
            cv2.imwrite(save_path + target + '/' + 'mask/'+ file_name + '_' + str(count) + '.png',
                             point_mask[bottom_point:bottom_point + spilt_size, left_point:left_point + spilt_size])

            count += 1


# 读取图片并裁剪保存

In [None]:
import glob
from tqdm import tqdm
import os


num_xml = len(os.listdir('xml path'))
save_to = 'patch_path'

with tqdm(total=num_xml, desc='Processing', colour='blue') as pbar:
    for xml in glob.glob('xml path/*.xml'):
        wsi_img = xml.replace('annotations', 'Images').replace('xml', 'tif')
        if  os.path.exists(wsi_img):
            prefect_uniform(wsi_img, xml, save_to, target='')

        len_img = len(os.listdir(save_to + 'img/'))
        len_mask = len(os.listdir(save_to + 'mask/'))
        pbar.set_postfix(**{"img": f"{len_img}",
                            "mask": f"{len_mask}"})
        pbar.update(1)

Processing: 100%|[34m██████████[0m| 45/45 [01:34<00:00,  2.09s/it, img=378, mask=378]


# 随机分割训练集和测试集

In [21]:
import random
import shutil


imgs = glob.glob(save_to + 'img/*.png')
l = len(imgs)
split = 0.7

random.shuffle(imgs)

for i, img in enumerate(imgs):
    mask = img.replace('img', 'mask')
    if i < split*l:
        shutil.copy(img, save_to + 'train/img/')
        shutil.copy(mask, save_to + 'train/mask/')
    else:
        shutil.copy(img, save_to + 'val/img/')
        shutil.copy(mask, save_to + 'val/mask/')

print(len(os.listdir(save_to + 'train/img/')))
print(len(os.listdir(save_to + 'val/img/')))

273
118
