In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import glob
import cv2
import pandas as pd
from itertools import product

In [2]:
def load_filename(phase, cls, les = None, data = 'sm', data_dir = '/mnt/disk2/data/private_data/SMhospital/capsule/1 preprocessed'):
    """
    phase = 'train', 'test'
    cls: [les]  
      'n': ['neg']
      'h': ['redspot', 'angio', 'active'], 
      'd': ['ero', 'ulc', 'str'],
      'p': ['amp', 'lym', 'tum']}
    """
    lesions = dict(neg = 'negative', 
                   redspot = 'red_spot', angio = 'angioectasia', active = 'active_bleeding', 
                   ero = 'erosion', ulcer = 'ulcer', str = 'stricture', 
                   amp = 'ampulla_of_vater', lym = 'lymphoid_follicles', tum = 'small_bowel_tumor')
    classes = dict(n = 'negative', h = 'hemorrhagic', d = 'depressed', p = 'protruded')

    path = os.path.join(data_dir, data, phase, classes[cls], lesions[les])
    pathlist = glob.glob(path + '/*.jpg')
    name = []
    for path in pathlist:
        name.append(os.path.basename(path))
    
    return np.asarray(name)

In [25]:
root = '/mnt/disk2/data/private_data/SMhospital/capsule/0 data/labeled/200121 validation dataset'

hospital_list = os.listdir(root)[1:]

data_list = {}

for hosp in hospital_list:
    lesion_list = os.listdir(os.path.join(root, hosp))
    data_list[hosp] = {}
    for les in lesion_list:
        print(hosp, les)
        data_list[hosp][les] = os.listdir(os.path.join(root, hosp, les))

[2019.01~05]  서울성모 1. red spot
[2019.01~05]  서울성모 10. phlebectasia
[2019.01~05]  서울성모 11. lymphangiectasia
[2019.01~05]  서울성모 2. angioectasia
[2019.01~05]  서울성모 3. active bleeding
[2019.01~05]  서울성모 4. erosion
[2019.01~05]  서울성모 5. ulcer
[2019.01~05]  서울성모 6. stricture
[2019.01~05]  서울성모 7. ampulla of vater
[2019.01~05]  서울성모 8. lymphoid follicles
[2019.01~05]  서울성모 9. small bowel tumor
[2019.01~05]  서울성모 96. Esophagus
[2019.01~05]  서울성모 97. Stomach
[2019.01~05]  서울성모 98. Colon
[2019.01~05]  서울성모 99. Small bowel_정상사진
[성빈센트] 사진분류_Pillcam 1. red spot
[성빈센트] 사진분류_Pillcam 10. phlebectasia
[성빈센트] 사진분류_Pillcam 11. lymphangiectasia
[성빈센트] 사진분류_Pillcam 2. angioectasia
[성빈센트] 사진분류_Pillcam 3. active bleeding
[성빈센트] 사진분류_Pillcam 4. erosion
[성빈센트] 사진분류_Pillcam 5. ulcer
[성빈센트] 사진분류_Pillcam 6. stricture
[성빈센트] 사진분류_Pillcam 7. ampulla of vater
[성빈센트] 사진분류_Pillcam 8. lymphoid follicles
[성빈센트] 사진분류_Pillcam 9. small bowel tumor
[성빈센트] 사진분류_Pillcam 96. Esophagus
[성빈센트] 사진분류_Pillcam 97. Stomach
[성빈센트] 사진분

### Preprocssing and Save

In [87]:
def printProgress(iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 50):
    formatStr = "{0:." + str(decimals) + "f}"
    percent = formatStr.format(100 * (iteration / float(total)))
    filledLength = int(round(barLength * iteration / float(total)))
    bar = '#' * filledLength + '-' * (barLength - filledLength)
    sys.stdout.write('\r{} |{} | {}{} {}'.format(prefix, bar, percent, '%', suffix)),
    if iteration == total:
        sys.stdout.write('\n')
    sys.stdout.flush()

def target_preprocessings(phase_a_switch = [1, 1, 1], phase_b_switch = True, mode = 'load'):
    """
    phase_a_switch = [1, 1, 1], [0, 0 ,1], [1, 1, 0].... 
    that means [flip, rotate, blur_sharp]
    """
    phase0 = ['_c']
    phase1 = {1: ['-', 'f'], 0: ['-']}
    phase2 = {1: ['-', 'r1', 'r2', 'r3'], 0: ['-']}
    phase3 = {1: ['-', 'ab', 'mb', 'eh'], 0: ['-']}
    phase4 = ['s_-30_v_30', 's_-30_v_-30', 's_30_v_-30', 's_30_v_30']

    if mode == 'load':
        phase_a_items = [phase1[phase_a_switch[0]], phase2[phase_a_switch[1]], phase3[phase_a_switch[2]]]
    elif mode == 'preprocessing':
        phase_a_items = [phase0, phase1[phase_a_switch[0]], phase2[phase_a_switch[1]], phase3[phase_a_switch[2]]]

    phase_a = []
    for i in list(product(*phase_a_items)):
        phase_a.append('_'.join(i))

    if not phase_b_switch != True:
        phase_b = []
        for i in list(product(*[phase_a, phase4])):
            phase_b.append('_'.join(i))
        return list(np.hstack([phase_a, phase_b]))
    else:
        return phase_a 

In [88]:
class ce_preprocessing:
    def __init__(self, data_dir, save_dir):
        self.data_dir = data_dir
        self.save_dir = save_dir

    def cropping(self, img):
        img = np.array(img, dtype = 'f4')
        img_pre = img[32:544, 32:544, :]
        for i in range(100):
            for j in range(100):
                if i + j > 99:
                    pass
                else :
                    img_pre[i, j, :] = 0
                    img_pre[i, -j, :] = 0
        return img_pre.astype('uint8')

    def rotate(self, img, degree):
        rows, cols = img.shape[:2]
        M = cv2.getRotationMatrix2D(center = (cols/2, rows/2), angle = degree, scale = 1)
        img_rotated = cv2.warpAffine(img, M, dsize = (rows, cols))
        return img_rotated
    
    def blur_and_sharp(self, img):
        img_avg_blur = cv2.blur(img, (5,5)).astype('uint8')
        
        kernel_size = 15
        
        kernel_motion_blur = np.zeros((kernel_size, kernel_size))
        kernel_motion_blur[int((kernel_size-1)/2), :] = np.ones(kernel_size)
        kernel_motion_blur = kernel_motion_blur / kernel_size
        img_mb = cv2.filter2D(img, -1, kernel_motion_blur).astype('uint8')
        
        kernel_edge_enhancement = np.array([[-1,-1,-1,-1,-1],[-1,2,2,2,-1],[-1,2,8,2,-1],[-1,2,2,2,-1],[-1,-1,-1,-1,-1]])/8.0
        img_eh = cv2.filter2D(img, -1, kernel_edge_enhancement).astype('uint8')    
        return img_avg_blur, img_mb, img_eh
    
    def bgr2_h_s_v(self, img):
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        return h, s, v

    def hsv_control(self, ch_data, ctr_value, ch_name):
        """
        ch_data: data of channel (h, s, or v) which you want to revise by ctr_value / shape: image.shape[0:2]
        ctr_value: the value that will be added to corresponding channel.
        ch_name: 'h', 's', or 'v'
        """
        ch_data_rev = ch_data.copy()
        if ctr_value > 0:
            ch_data_rev[np.where(ch_data <= 255 - ctr_value)] += ctr_value
        else:
            ch_data_rev[np.where(ch_data + ctr_value >= 0)] -= abs(ctr_value)
        return ch_data_rev
    
    def pre_aug(self, img, phase = 'x160'):  
        """
        The image will be preprocessed and augmented at one go 
        by an entire process consisting of  the repetitive statement (for loop) per the processing phase 
        """
        preprocessed_imgs = []
        preprocessed_nots = []
        
        crop = self.cropping(img)
        if phase == 'crop':
            return [crop], ['_c_-_-_-']
        else:
            sv_ctr_values = [-30, 30]
            c_r1, c_r2, c_r3 = self.rotate(crop, 90), self.rotate(crop, 180), self.rotate(crop, 270)
            for r, r_n in zip([crop, c_r1, c_r2, c_r3], ['-', 'r1', 'r2', 'r3']):
                r_f = np.flipud(r)
                for f,  f_n in zip([r, r_f], ['-', 'f']): 
                    f_ab, f_mb, f_edge = self.blur_and_sharp(f)
                    for b, b_n in zip([f, f_ab, f_mb, f_edge], ['-', 'ab', 'mb', 'eh']):                    
                        preprocessed_imgs.append(b)
                        not_ = '_c_{}_{}_{}'.format(f_n, r_n, b_n)
                        preprocessed_nots.append(not_)
                        h, s, v = self.bgr2_h_s_v(b)
                        for s_value in sv_ctr_values:
                            s_rev = self.hsv_control(s, s_value, ch_name = 's')
                            for v_value in sv_ctr_values:
                                v_rev = self.hsv_control(v, v_value, ch_name = 'v')
                                v_rev[np.where(v <= 7)] = 0
                                b_sv = cv2.merge((h, s_rev, v_rev))
                                b_sv = cv2.cvtColor(b_sv, cv2.COLOR_HSV2BGR)
                                preprocessed_imgs.append(b_sv)
                                not_ = '_c_{}_{}_{}_s_{}_v_{}'.format(f_n, r_n, b_n, s_value, v_value)
                                preprocessed_nots.append(not_) 
                if not phase != 'before_rotation':
                    break
            return preprocessed_imgs, preprocessed_nots
    
    def avg_blur(self, img):
        return cv2.blur(img, (5,5)).astype('uint8')
    
    def motion_blur(self, img):
        kernel_size = 15
        kernel_motion_blur = np.zeros((kernel_size, kernel_size))
        kernel_motion_blur[int((kernel_size-1)/2), :] = np.ones(kernel_size)
        kernel_motion_blur = kernel_motion_blur / kernel_size
        return cv2.filter2D(img, -1, kernel_motion_blur).astype('uint8')
    
    def edge_enhancement(self, img):
        kernel_edge = np.array([[-1,-1,-1,-1,-1],[-1,2,2,2,-1],[-1,2,8,2,-1],[-1,2,2,2,-1],[-1,-1,-1,-1,-1]])/8.0
        return cv2.filter2D(img, -1, kernel_edge).astype('uint8') 
    
    def s_rev(self, img, s_value):
        h, s, v = self.bgr2_h_s_v(img)
        s_rev = self.hsv_control(s, s_value, ch_name = 's')
        
        return [h, s_rev, v]
    def v_rev_after_s_rev(self, s_rev_outputs, v_value):
        h, s_rev, v = s_rev_outputs
        v_rev = self.hsv_control(v, v_value, ch_name = 'v')
        v_rev[np.where(v <= 7)] = 0
        img_sv = cv2.merge((h, s_rev, v_rev))
        return cv2.cvtColor(img_sv, cv2.COLOR_HSV2BGR)
            
    def pre_aug_target_phase(self, img, phase = 'c'): 
        
        """
        phase, ex) 'c_f_-_mb_s_-30_v_30' -> 'c_f_-_mb_s-30_v30' -> ['c', 'f', '-', 's-30','v30']
        It allows to preprocess the image in specific phase, but slower it is fit to check preprocessing with small data
        """
        function = {'': (lambda x: x), '-': (lambda x: x),
                    'c': (lambda x: self.cropping(x)),
                    'f': (lambda x: np.flipud(x)), 
                    'r1': (lambda x: self.rotate(x, 90)), 
                    'r2': (lambda x: self.rotate(x, 180)), 
                    'r3': (lambda x: self.rotate(x, 270)),
                    'ab': (lambda x: self.avg_blur(x)),
                    'mb': (lambda x: self.motion_blur(x)),
                    'eh': (lambda x: self.edge_enhancement(x)),
                    's-30': (lambda x: self.s_rev(x, -30)),
                    's30': (lambda x: self.s_rev(x, 30)),
                    'v-30': (lambda x: self.v_rev_after_s_rev(x, -30)),
                    'v30': (lambda x: self.v_rev_after_s_rev(x, 30))}
        values = ['-30', '30']
        for i in values:
            if i in phase:
                phase = phase.replace('_{}'.format(i), str(i))
        phase_seg = phase.split('_')  
        for i, p in zip(range(len(phase_seg)), phase_seg):
            if i == 0:
                p_img = function[p](img)
            else:
                p_img = function[p](p_img)
        return p_img
    
    def pre_aug_and_save(self, data_list, preprocessing_phase = 'x160', pre_aug_type = 'for_loop',
                         phase_a = [1, 1, 1], phase_b = True):
        
        """
        preprocessing_phase = 'x160', 'crop', 'before_rotation' for pre_aug
        phase_a = [1, 1, 1], [1, 0, 1], [1, 1, 0] .... [flip, rotate, blur_sharp]
        phase_b = True -> phase_a (max. x32) + phase_a * sv_control (max. x32x4) => max, 32 x 5
        """
        lesion_type_dict = {'h': ['1. red spot', '2. angioectasia', '3. active bleeding'],
                            'd': ['4. erosion', '5. ulcer', '6. stricture'],
                            'p': ['7. ampulla of vater', '8. lymphoid follicles', '9. small bowel tumor']}

        classes = dict(n = 'negative', h = 'hemorrhagic', d = 'depressed', p = 'protruded')

        hospital_list = data_list.keys()
        for hosp in hospital_list:
            lesion_list = data_list[hosp]
            for les in lesion_list:
                for les_type, les_ in lesion_type_dict.items():
                    if les in les_:
                        save_path = os.path.join(self.save_dir, classes[les_type], '_'.join(les.split(' ')[1:]))
                        break
                    else:
                        save_path = os.path.join(self.save_dir, 'etc', '_'.join(les.split(' ')[1:]))
                if not(os.path.isdir(save_path)):
                    os.makedirs(save_path)
                filenames = data_list[hosp][les]
                for i, filename in enumerate(filenames):
                    import_path = os.path.join(self.data_dir, hosp, les, filename)
                    if not(os.path.isdir(import_path)):
                        img = cv2.imread(import_path)
                        if pre_aug_type == 'for_loop':
                            p_imgs, p_nots = self.pre_aug(img, phase = preprocessing_phase)  
                            for img_, not_ in zip(p_imgs, p_nots):
                                save_filename = os.path.join(save_path, '{}_{}{}'.format(filename[:-4], not_, filename[-4:]))
                                if not(os.path.isfile(save_filename)):
                                    cv2.imwrite(save_filename, img_)
                        elif pre_aug_type == 'target_phase':
                            for not_ in target_preprocessings(phase_a, phase_b, mode = 'preprocessing'):
                                save_filename = os.path.join(save_path, '{}_{}{}'.format(filename[:-4], not_, filename[-4:]))
                                if not(os.path.isfile(save_filename)):
                                    p_img = self.pre_aug_target_phase(img, phase = not_)
                                    cv2.imwrite(save_filename, p_img)
#                     print(len(filenames), hosp, les)
                    printProgress(i, len(filenames), prefix = '{}/{}'.format(hosp, les))

In [89]:
data_dir = '/mnt/disk2/data/private_data/SMhospital/capsule/0 data/labeled/200121 validation dataset'
save_dir = '/mnt/disk2/data/private_data/SMhospital/capsule/0 data/labeled/200121 validation dataset/preprocessing'

ce = ce_preprocessing(data_dir, save_dir)

In [90]:
ce.pre_aug_and_save(data_list, pre_aug_type = 'target_phase', phase_a = [0, 0, 0], phase_b = False)

[여의도성모] 사진분류/97. Stomach |################################################## | 99.2%  | 99.8%  0.0%   - | 0.0% 

In [91]:
def num_of_jpg_file(dir_):
    print(len(glob.glob(dir_ + '/*.jpg')))

In [93]:
num_of_jpg_file(save_dir + '/hemorrhagic/red_spot' )
num_of_jpg_file(save_dir + '/hemorrhagic/angioectasia' )
num_of_jpg_file(save_dir + '/hemorrhagic/active_bleeding' )
num_of_jpg_file(save_dir + '/depressed/erosion' )
num_of_jpg_file(save_dir + '/depressed/ulcer' )
num_of_jpg_file(save_dir + '/depressed/stricture' )

370
2
293
297
410
48
