## 处理病理学图片
* 1.根据原图纹理信息对图像进行剪切；
* 2.提取标记信息（轮廓）；
* 3.跟据掩码图像（轮廓图像手动填充）及原图（去除白色无效区域）生成详细掩码图；
* 4.根据掩码图像生成标签文件（mat）；

In [16]:
from __future__ import absolute_import, division, print_function

import os
import glob
import random
import shutil
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
from six.moves import xrange
from skimage.color import label2rgb
from scipy.io import loadmat, savemat

%matplotlib inline

### 基本设置#1
* 设置数据格式、目录等；
* 获取文件列表、文件名（不包括后缀）列表。

In [8]:
data_fmt = '.jpg'
dir_rt = '../data/2.5x/'
dir_ori = os.path.join(dir_rt, 'original')
dir_ann = os.path.join(dir_rt, 'annotated')
if not os.path.exists(dir_ori):
    raise ValueError(dir_ori + ' dose not exist.')
if not os.path.exists(dir_ann):
    raise ValueError(dir_ann + ' dose not exist.')

file_list = glob.glob(os.path.join(dir_ori, '*'+data_fmt))
name_list = [os.path.splitext(os.path.basename(f))[0] for f in file_list]

total_file_num = len(name_list)
print('Number of files:', total_file_num)

# 1.clip
dir_ori_clipped = os.path.join(dir_rt, 'original_clipped')
if not os.path.exists(dir_ori_clipped):
    os.makedirs(dir_ori_clipped)
dir_ann_clipped = os.path.join(dir_rt, 'anntated_clipped')
if not os.path.exists(dir_ann_clipped):
    os.makedirs(dir_ann_clipped)
# 2.contour
dir_contour = os.path.join(dir_rt, 'contour')
if not os.path.exists(dir_contour):
    os.makedirs(dir_contour)

Number of files: 278


### 1.根据原图纹理信息对图像进行剪切

In [None]:
TH_VALID = 100 # for extracting valid rows and cols

tmp_num = 1
for n in name_list[:]:
    print('{}/{}   {}'.format(tmp_num, total_file_num, n))
    tmp_num += 1
    
    f = n + data_fmt
    
    img_ori = cv.imread(os.path.join(dir_ori, f))
    img_ann = cv.imread(os.path.join(dir_ann, f))
    
    img_gray = cv.cvtColor(img_ori, cv.COLOR_BGR2GRAY)
    img_edges = cv.Canny(img_ori, 50, 200)
    img_edges[img_edges > 0] = 1
    
    sum_each_col = np.sum(img_edges, 0)
    sum_each_row = np.sum(img_edges, 1)
    
    h, w = img_edges.shape
    th_col = h // TH_VALID
    th_row = w // TH_VALID
    valid_cols = np.where(sum_each_col > th_col)[0]
    w_start = valid_cols[0]
    w_end = valid_cols[-1]
    valid_rows = np.where(sum_each_row > th_row)[0]
    h_start = valid_rows[0]
    h_end = valid_rows[-1]
    
    cv.imwrite(os.path.join(dir_ori_clipped,f), img_ori[h_start:h_end,w_start:w_end,:])
    cv.imwrite(os.path.join(dir_ann_clipped,f), img_ann[h_start:h_end,w_start:w_end,:])

### 2.提取标记信息（轮廓）
* 通过使用numpy查找像素值不同提取标记信息，效果比OpenCV灰度化后做帧差要好；
* 对轮廓做膨胀操作，忽略掉边界不确定标注的影响。

In [None]:
kernel = np.ones((5,5),np.uint8) # for cv.dilate

tmp_num = 1
for n in name_list[:]:
    print('{}/{}   {}'.format(tmp_num, total_file_num, n))
    tmp_num += 1
    
    f = n + data_fmt
    
    img_ori = cv.imread(os.path.join(dir_ori_clipped, f))
    img_ann = cv.imread(os.path.join(dir_ann_clipped, f))
    
    contour = np.zeros_like(img_ori)
    diff = img_ori - img_ann
    diff = (diff != 0)
    contour[diff] = 255
    
    dilation = cv.dilate(contour, kernel, iterations=5)
    cv.imwrite(os.path.join(dir_contour,f), dilation)

### 基本设置#2
* 设置目录等。

In [24]:
# 3.mask / fine mask
TH_INVALID = 200
dir_mask = os.path.join(dir_rt, 'mask')
dir_mask_fine = os.path.join(dir_rt, 'mask_fine_th'+str(TH_INVALID))
if not os.path.exists(dir_mask):
    raise ValueError(dir_mask + ' dose not exist.')
if not os.path.exists(dir_mask_fine):
    os.makedirs(dir_mask_fine)
# 4.label
dir_label = os.path.join(dir_rt, 'label')
if not os.path.exists(dir_label):
    os.makedirs(dir_label)
dir_label_cls2 = os.path.join(dir_label, 'cls2')
dir_label_cls4 = os.path.join(dir_label, 'cls4')
dir_label_cls6 = os.path.join(dir_label, 'cls6')
if not os.path.exists(dir_label_cls2):
    os.makedirs(dir_label_cls2)
if not os.path.exists(dir_label_cls4):
    os.makedirs(dir_label_cls4)
if not os.path.exists(dir_label_cls6):
    os.makedirs(dir_label_cls6)
dir_label_mask = os.path.join(dir_rt, 'label_mask')
if not os.path.exists(dir_label_mask):
    os.makedirs(dir_label_mask)
dir_label_mask_cls2 = os.path.join(dir_label_mask, 'cls2')
dir_label_mask_cls4 = os.path.join(dir_label_mask, 'cls4')
dir_label_mask_cls6 = os.path.join(dir_label_mask, 'cls6')
if not os.path.exists(dir_label_mask_cls2):
    os.makedirs(dir_label_mask_cls2)
if not os.path.exists(dir_label_mask_cls4):
    os.makedirs(dir_label_mask_cls4)
if not os.path.exists(dir_label_mask_cls6):
    os.makedirs(dir_label_mask_cls6)

dir_label_fine = os.path.join(dir_rt, 'label_fine_th'+str(TH_INVALID))
if not os.path.exists(dir_label_fine):
    os.makedirs(dir_label_fine)
dir_label_fine_cls2 = os.path.join(dir_label_fine, 'cls2')
dir_label_fine_cls4 = os.path.join(dir_label_fine, 'cls4')
dir_label_fine_cls6 = os.path.join(dir_label_fine, 'cls6')
if not os.path.exists(dir_label_fine_cls2):
    os.makedirs(dir_label_fine_cls2)
if not os.path.exists(dir_label_fine_cls4):
    os.makedirs(dir_label_fine_cls4)
if not os.path.exists(dir_label_fine_cls6):
    os.makedirs(dir_label_fine_cls6)
dir_label_mask_fine = os.path.join(dir_rt, 'label_mask_fine_th'+str(TH_INVALID))
if not os.path.exists(dir_label_mask_fine):
    os.makedirs(dir_label_mask_fine)
dir_label_mask_fine_cls2 = os.path.join(dir_label_mask_fine, 'cls2')
dir_label_mask_fine_cls4 = os.path.join(dir_label_mask_fine, 'cls4')
dir_label_mask_fine_cls6 = os.path.join(dir_label_mask_fine, 'cls6')
if not os.path.exists(dir_label_mask_fine_cls2):
    os.makedirs(dir_label_mask_fine_cls2)
if not os.path.exists(dir_label_mask_fine_cls4):
    os.makedirs(dir_label_mask_fine_cls4)
if not os.path.exists(dir_label_mask_fine_cls6):
    os.makedirs(dir_label_mask_fine_cls6)

### 3.跟据掩码图像及原图生成详细掩码图

In [None]:
tmp_num = 1
for n in name_list[:]:
    print('{}/{}   {}'.format(tmp_num, total_file_num, n))
    tmp_num += 1
    
    f = n + data_fmt
    
    img_ori = cv.imread(os.path.join(dir_ori_clipped, f))
    img_mask = cv.imread(os.path.join(dir_mask, f))
    
    invalid_mask = ((img_ori[:,:,0]) > TH_INVALID) & \
                   ((img_ori[:,:,1]) > TH_INVALID) & \
                   ((img_ori[:,:,2]) > TH_INVALID)
    
    img_mask[:,:,0][invalid_mask] = 0
    img_mask[:,:,1][invalid_mask] = 0
    img_mask[:,:,2][invalid_mask] = 0
    cv.imwrite(os.path.join(dir_mask_fine,f), img_mask)

### 4.根据掩码图像生成标签文件（mat）
* 同时生成所有类别：
    * CLS2：正常/病灶[12345]
    * CLS4：正常/[12]/[34]/[5]
    * CLS6：正常/[1]/[2]/[3]/[4]/[5]
* 使用详细掩码图像时像素值变化较大，直接使用初始掩码图和原图；
* 使用宽松阈值方式而不是严格相等；

In [None]:
# color 
# RGB_1 = (  0, 255, 255) # Group#1 --- cyan
# RGB_2 = (  0, 255,   0) # Group#2 --- green
# RGB_3 = (255, 255,   0) # Group#3 --- yellow
# RGB_4 = (255,   0,   0) # Group#4 --- red
# RGB_5 = (255,   0, 255) # Group#5 --- magenta
RGB_1 = ( 20, 230, 230)
RGB_2 = ( 20, 230,  20)
RGB_3 = (230, 230,  20)
RGB_4 = (230,  20,  20)
RGB_5 = (230,  20, 230)

# color map --- BGR
color_map_cls2 = np.array([[  0,   0,   0], # 0 - normal
                           [  0,   0, 255]  # 1 - group#12345
                          ])
color_map_cls4 = np.array([[  0,   0,   0], # 0 - normal
                           [255, 255,   0], # 1 - group#12
                           [  0, 255, 255], # 2 - group#34
                           [255,   0, 255]  # 3 - group#5
                          ])
color_map_cls6 = np.array([[  0,   0,   0], # 0 - normal
                           [255, 255,   0], # 1 - group#1
                           [  0, 255,   0], # 2 - group#2
                           [  0, 255, 255], # 3 - group#3
                           [  0,   0, 255], # 4 - group#4
                           [255,   0, 255]  # 5 - group#5
                          ])

save_data_fmt  = '.mat'
save_image_fmt = '.jpg'

tmp_num = 11
for n in name_list[10:]:
    print('{}/{}   {}'.format(tmp_num, total_file_num, n))
    tmp_num += 1
    
    f = n + data_fmt
    
    img_ori = cv.imread(os.path.join(dir_ori_clipped, f))
    img_mask = cv.imread(os.path.join(dir_mask, f))
    
    red   = img_mask[:,:,2]
    green = img_mask[:,:,1]
    blue  = img_mask[:,:,0]
    mask_group1 = ((red < RGB_1[0]) & (green > RGB_1[1]) & (blue > RGB_1[2]))
    mask_group2 = ((red < RGB_2[0]) & (green > RGB_2[1]) & (blue < RGB_2[2]))
    mask_group3 = ((red > RGB_3[0]) & (green > RGB_3[1]) & (blue < RGB_3[2]))
    mask_group4 = ((red > RGB_4[0]) & (green < RGB_4[1]) & (blue < RGB_4[2]))
    mask_group5 = ((red > RGB_5[0]) & (green < RGB_5[1]) & (blue > RGB_5[2]))
    
    label_cls2 = np.zeros(img_ori.shape[:2], dtype=np.uint8)
    label_cls4 = np.zeros(img_ori.shape[:2], dtype=np.uint8)
    label_cls6 = np.zeros(img_ori.shape[:2], dtype=np.uint8)
    
    label_cls2[mask_group1] = 1
    label_cls2[mask_group2] = 1
    label_cls2[mask_group3] = 1
    label_cls2[mask_group4] = 1
    label_cls2[mask_group5] = 1
    mat_data = {}
    mat_data['label'] = label_cls2
    savemat(os.path.join(dir_label_cls2,n+save_data_fmt), mat_data, do_compression=True)
    label_mask = label2rgb(label_cls2, colors=color_map_cls2[1:], bg_label=0)
    label_mask[label_cls2 == 0] = [0,0,0]
    cv.imwrite(os.path.join(dir_label_mask_cls2,n+save_image_fmt), label_mask)
    
    label_cls4[mask_group1] = 1
    label_cls4[mask_group2] = 1
    label_cls4[mask_group3] = 2
    label_cls4[mask_group4] = 2
    label_cls4[mask_group5] = 3
    mat_data = {}
    mat_data['label'] = label_cls4
    savemat(os.path.join(dir_label_cls4,n+save_data_fmt), mat_data, do_compression=True)
    label_mask = label2rgb(label_cls4, colors=color_map_cls4[1:], bg_label=0)
    label_mask[label_cls4 == 0] = [0,0,0]
    cv.imwrite(os.path.join(dir_label_mask_cls4,n+save_image_fmt), label_mask)
    
    label_cls6[mask_group1] = 1
    label_cls6[mask_group2] = 2
    label_cls6[mask_group3] = 3
    label_cls6[mask_group4] = 4
    label_cls6[mask_group5] = 5
    mat_data = {}
    mat_data['label'] = label_cls6
    savemat(os.path.join(dir_label_cls6,n+save_data_fmt), mat_data, do_compression=True)
    label_mask = label2rgb(label_cls6, colors=color_map_cls6[1:], bg_label=0)
    label_mask[label_cls6 == 0] = [0,0,0]
    cv.imwrite(os.path.join(dir_label_mask_cls6,n+save_image_fmt), label_mask)
    
    # fine label
    red   = img_ori[:,:,2]
    green = img_ori[:,:,1]
    blue  = img_ori[:,:,0]
    mask_invalid = ((red > TH_INVALID) & (green > TH_INVALID) & (blue > TH_INVALID))
    
    label_cls2[mask_invalid] = 0
    label_cls4[mask_invalid] = 0
    label_cls6[mask_invalid] = 0
    
    mat_data = {}
    mat_data['label'] = label_cls2
    savemat(os.path.join(dir_label_fine_cls2,n+save_data_fmt), mat_data, do_compression=True)
    label_mask = label2rgb(label_cls2, colors=color_map_cls2[1:], bg_label=0)
    label_mask[label_cls2 == 0] = [0,0,0]
    cv.imwrite(os.path.join(dir_label_mask_fine_cls2,n+save_image_fmt), label_mask)
    
    mat_data = {}
    mat_data['label'] = label_cls4
    savemat(os.path.join(dir_label_fine_cls4,n+save_data_fmt), mat_data, do_compression=True)
    label_mask = label2rgb(label_cls4, colors=color_map_cls4[1:], bg_label=0)
    label_mask[label_cls4 == 0] = [0,0,0]
    cv.imwrite(os.path.join(dir_label_mask_fine_cls4,n+save_image_fmt), label_mask)
    
    mat_data = {}
    mat_data['label'] = label_cls6
    savemat(os.path.join(dir_label_fine_cls6,n+save_data_fmt), mat_data, do_compression=True)
    label_mask = label2rgb(label_cls6, colors=color_map_cls6[1:], bg_label=0)
    label_mask[label_cls6 == 0] = [0,0,0]
    cv.imwrite(os.path.join(dir_label_mask_fine_cls6,n+save_image_fmt), label_mask)

11/278   201766041-4
12/278   201766041-5
13/278   201766041-6
14/278   201766041-7
15/278   201766041-8
16/278   201766041-9
17/278   201768857-6
18/278   201768980-4
19/278   201768980-5
20/278   201770586-6
21/278   201772047-7
22/278   201772370-2
23/278   201772930-1
24/278   201772930-10
25/278   201772930-11
26/278   201772930-13
27/278   201772930-4
28/278   201772930-5
29/278   201772930-7
30/278   201772930-8
31/278   201772930-9
32/278   201773015-4
33/278   201773015-7
34/278   201773016-4
35/278   201773016-5 (2)
36/278   201773016-6
37/278   201773056-1
38/278   201773056-2
39/278   201773056-5
40/278   201773648-1
41/278   201773648-3
42/278   201773648-4
43/278   201774005-5
44/278   201774082-3
45/278   201774082-7
46/278   201774082-8
47/278   201774362-3
48/278   201775803-2
49/278   201775803-3
50/278   201775803-4
51/278   201775876-1
52/278   201775876-10
53/278   201775876-11
54/278   201775876-2
55/278   201775876-3
56/278   201775876-4
57/278   201775876-5
58/2