In [1]:
import time
import os
import glob
import numpy as np
import skimage.io as io
from PIL import Image

import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
# -----------------------------------------------------------------------------------------------------------------------
# 初步篩選Unsup_train.py產出的結果
# 設定條件排除失敗的 mask
# condition_1: 選取非遮罩(白色)佔畫面範圍在一定範圍的影像
# condition_2: 選取中間(20*20)取樣點為白色的影像
# condition_3: 排除中間橫帶為白色的影像
# condition_4: 排除中間下方，左右邊緣(25*25)取樣點為白色的影像
# 篩選出來後在肉眼人工挑選較好的mask
# -----------------------------------------------------------------------------------------------------------------------

In [65]:
# 放置Unsup_train.py產出的結果
base_dir = 'data/Unsup_rmbg_0903/'
 

pathes = [path for path in glob.glob(base_dir + '**', recursive=True)
        if os.path.splitext(path)[-1].lower() in ['.jpg', '.png', '.jpeg']]
print('total : ', len(pathes))

path_checkings = [path for path in pathes if 'checking' in path]
path_masks = [path for path in pathes if 'moth_rmbg_mask' in path]
path_rmbgs = [path for path in pathes if 'moth_rmbg' + os.sep in path]

print('checking : ', len(path_checkings))
print('mask : ', len(path_masks))
print('rmbg : ', len(path_rmbgs))

assert len(path_rmbgs) == len(path_masks)


total :  36304
checking :  10701
mask :  10701
rmbg :  10701


In [88]:
as_gray = True  # 指定以灰階讀入 c = 1
# 以RGB讀入時，白色為255、[0, 255]、'uint8'。 當以as_gray = True讀入時，白色為1、[0, 1]、 'float64'
color_range = 255 if as_gray == False else 1
size = 256
pixel_size = 10
crop_l, crop_u = int(size*0.5 - pixel_size), int(size*0.5 + pixel_size)
print(
    f'center_crop pixel size : {crop_u - crop_l}x{crop_u - crop_l}. [{crop_l}: {crop_u}]')


path_masks_ = []
for idx, path in enumerate(path_masks):
    im_name = os.path.split(path)[-1]
    im = io.imread(path, as_gray=as_gray)
    # condition_1: 選取非遮罩(白色)佔畫面範圍在一定範圍的影像
    con_1 = im.mean() > color_range*0.2 and im.mean() < color_range*0.8

    # condition_2: 選取中間(20*20)取樣點為白色的影像
    im_center = im[crop_l: crop_u, crop_l: crop_u]
    con_2 = im_center.mean() > 0.8

    # condition_3: 排除(not)中間橫帶為白色的影像
    im_centerstrip = im[int(size*0.3):int(size*0.7), :]
    con_3 = (im_centerstrip.mean() > 0.8)

    # condition_4: 排除(not)中間下方，左右邊緣(25*25)取樣點為白色的影像
    im_llside = im[int(size*0.7-15): int(size*0.7+15), 0:30]
    im_lrside = im[int(size*0.7-15): int(size*0.7+15), -30:]
    for pixels in [im_llside, im_lrside]:
        if (pixels.mean() > 0.8):
            con_4 = True
        else:
            con_4 = False

    if con_1 and con_2 and not con_3 and not con_4:
        path_masks_.append(path)
        print(f'{idx:5d}, size : {len(path_masks_)} , {100*(idx+1)/len(path_masks)}%, {im_name:30s} appended\t' )

print('msaks filtered : ', len(path_masks_))

path_rmbgs_ = []
for path in path_masks_:
    path = path.replace('moth_rmbg_mask', 'moth_rmbg')
    if '_mask.png' in path:
        path = path.replace('_mask.png', '_rmbg.jpg')
    path_rmbgs_.append(path)
print('rmbgs filtered : ', len(path_rmbgs_))

assert len(path_masks_) == len(path_rmbgs_)


center_crop pixel size : 20x20. [118: 138]
    0, 1 ,ARC01_SJTT0564_1_male_cropped.png appended	
    1, 2 ,ARC01_SJTT0925_1_male_cropped.png appended	
    9, 3 ,ARC06_SJTT1396_1_male_cropped.png appended	
   11, 4 ,ARC10_SJTT0564_1_cropped.png   appended	
   12, 5 ,ARC10_SJTT0792_1_male_cropped.png appended	
   16, 6 ,BOM01_SJTT1606_1_male_cropped.png appended	
   17, 7 ,BOM02_SJTT0871_1_male_cropped.png appended	
   24, 8 ,CARS0514_cropped.png           appended	
   33, 9 ,CARS0872_cropped.png           appended	
   35, 10 ,CARS0877_cropped.png           appended	
   36, 11 ,CARS0990_cropped.png           appended	
   38, 12 ,CARS1132_cropped.png           appended	
   44, 13 ,CARS1209_cropped.png           appended	
   45, 14 ,CARS1215_cropped.png           appended	
   46, 15 ,CARS1216_cropped.png           appended	
   48, 16 ,CARS1306_cropped.png           appended	
   49, 17 ,CARS1318_cropped.png           appended	
   53, 18 ,CARS1452_cropped.png           appended	
   57, 19 ,C

In [89]:
log_time = time.strftime("%Y%m%d_%H%M")

save_dir_mask = os.path.join('tmp', 'mask_select' , log_time)
if not os.path.exists(save_dir_mask):
    os.makedirs(save_dir_mask)

pathes = zip(path_masks_, path_rmbgs_) 
for idx, (path_mask, path_rmbg) in enumerate(pathes):
    im_name = os.path.split(path_mask)[-1].split('.')[0].split('。')[0]
    para = os.path.split(path_mask)[0].split(os.sep)[-3]

    for path, suffixes in [(path_mask,'_mask.png') , (path_rmbg, '_rmbg.jpg')]:
        im = io.imread(path)
        fname = im_name + '。' + para + suffixes
        file = os.path.join(save_dir_mask, fname)
        Image.fromarray(im).save(file)
        print(f'{idx:4d}, {100*(idx+1)/len(pathes)}%, {fname:30s} saved\t\t\t')

   0, ARC01_SJTT0564_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_mask.png saved			
   0, ARC01_SJTT0564_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_rmbg.jpg saved			
   1, ARC01_SJTT0925_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_mask.png saved			
   1, ARC01_SJTT0925_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_rmbg.jpg saved			
   2, ARC06_SJTT1396_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_mask.png saved			
   2, ARC06_SJTT1396_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_rmbg.jpg saved			
   3, ARC10_SJTT0564_1_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_mask.png saved			
   3, ARC10_SJTT0564_1_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_rmbg.jpg saved			
   4, ARC10_SJTT0792_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_mask.png saved			
   4, ARC10_SJTT0792_1_male_cropped。FEL_mlabel10_LR0.05_scale1000.0_sigma0.1_rmbg.jpg saved			
   5, BOM01_SJTT1606_1_male_cropped。FEL_mlabel10_LR0.05_scal

In [None]:
# -----------------------------------------------------------------------------------------------------------------------
# 檢視Unsup初步人工篩選去背較成功的樣本數
# -----------------------------------------------------------------------------------------------------------------------

In [96]:
base_dir = 'data/label_waiting_postprocess/mask_selected/'
files_ =  glob.glob(base_dir + '*.png')
print('Total files : ' , len(files_))

files = list(set(os.path.split(f)[-1].split('。')[0] for f in files_))
print('Unique files : ' , len(files))

Total files :  527
Unique files :  262


In [94]:
os.path.split(files_[0])[-1].split('。')[0]

'ARC01_SJTT0564_1_male_cropped'

## note
### 非監督式去背模型第一階段小結:

- 分別採用SLIC與FEL(Felzenszwalb)演算法進圖像分割(Segmentaiom)
    - 參數設定(待補)
        - SLIC
            - compactness(0.1, 1, 10) x sigma(0.0, 1.0)
            - lr = 0.1
            - minlabel = 8
        - FEL(Felzenszwalb)
            - scale(100, 500, 1000, 3000) x sigma(0.5, 1.0)  (scale = 100成功機會偏低)
            - lr = 0.05
            - minlabel = 10
- 500個標本(輪廓明顯、背景乾淨
    - 從500個標本，得到共計10701張mask(label)影像，先以程式自動化排除切割失敗的mask
    - 程式自動篩選得到3773張影像後，再使用人工揀選
- 人工揀選
    - 得到527張mask、包含262個標本(同一標本會揀選多張可行的mask)
- 討論：
    - SLIC演算法不容易突破padding邊界，成功率很低
    - 這組參數對於翅本輪廓明顯、但翅膀有對比明顯之線條及斑點會失敗
        - 針對這類花色的標本，可能要另外找出適合的參數，取得rgb色塊圖後再手動去背

### 後續工作:
- 非監督式去背所得的mask待使用postprocess.ipynb再處裡
- 其他取得mask失敗的標本，改以取得rgb色塊留待後續用填補工具取得mask
    - 包括三大類資料，需要分別設定適合參數
        - 斑紋對比明顯、輪廓明顯、輪廓不明顯)