In [54]:
import glob
import cv2
import os
import shutil
import skimage
import numpy as np
import pandas as pd

import skimage.transform 
import skimage.morphology
from skimage.io import imread, imsave
from skimage.measure import regionprops

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
image_paths = glob.glob('../input/stage1_test/*/images/*.png')
test_shapes = []
for path in image_paths:
    img = cv2.imread(path)
#     img = skimage.io.imread(path)
    test_shapes.append(img.shape)
pd.Series(test_shapes).value_counts()

(256, 256, 3)    24
(512, 640, 3)     8
(512, 680, 3)     8
(260, 347, 3)     4
(520, 696, 3)     4
(520, 348, 3)     4
(519, 253, 3)     4
(524, 348, 3)     4
(519, 161, 3)     2
(519, 162, 3)     2
(390, 239, 3)     1
dtype: int64

In [3]:
# 0 - staining;
# 1 - fluorescent microscopy;
# 2 - brightfield microscopy;
type_names = {0: "staining", 1: "fluorescent", 2: "brightfield"}

def get_violet_num(img):
    violet_num = 0
    h, w = img.shape[:2]
    for y in range(h):
        for x in range(w):
            if img[y, x][0] > img[y, x][1] and img[y, x][2] > img[y, x][1]:
                violet_num += 1

    return violet_num

def get_microscopy_type(img):
    violet_num = get_violet_num(img)
    if violet_num > 0:
        return 0
    mean_int = img.mean()
    # randomly picked threshold for distinquishing fluorescent and brightfield
    if mean_int > 100:
        return 2
    return 1

### test gray53 resize

In [4]:
mask_paths = glob.glob('../input/psds_gray53/*/*.mask.png')
mask_paths.sort()
print('Total number of gray testing samples: ' + str(len(mask_paths))) 

Total number of gray testing samples: 53


In [5]:
image_ids, image_shapes, image_types, num_of_cells, cell_areas, mean_area, nuclei_size = [], [], [], [], [], [], []
shape_hs, shape_ws = [], []

In [6]:
counter = 0
for path in mask_paths:
    counter += 1
    if counter % 100 == 0:
        print(counter)
    image_id = path.split('/')[-1].split('.mask')[0]
    image_ids.append(image_id)
    
    # image shape and type
    mask = cv2.imread(path)
    mask = mask[:,:,2] + mask[:,:,1] + mask[:,:,0]
    image_shapes.append(list(mask.shape))
    shape_hs.append(mask.shape[0])
    shape_ws.append(mask.shape[1])
    image_types.append(1)
#     img_type = get_microscopy_type(img)
#     image_types.append(img_type)
    
    # mask info
    regions = regionprops(mask)
    num_of_cells.append(len(regions))
    areas = []
    for reg in regions:
        area = reg.area
        areas.append(area)
        
    cell_areas.append(areas)
    m_area = np.mean(areas)
    mean_area.append(m_area)
    nuclei_size.append(np.sqrt(m_area))

In [7]:
df = pd.DataFrame(list(zip(image_ids, image_shapes, shape_hs, shape_ws, image_types, 
                           num_of_cells, cell_areas, mean_area, nuclei_size)), 
                 columns = ['ImageId', 'Shape', 'ShapeH', 'ShapeW', 'Type', 
                            'NumCells', 'CellAreas', 'MeanArea', 'NucleiSize'])

df['RatioH'] = df.apply(lambda row: row['NucleiSize'] / row['ShapeH'], axis=1)
df['RatioW'] = df.apply(lambda row: row['NucleiSize'] / row['ShapeW'], axis=1)

df.sample(2)

Unnamed: 0,ImageId,Shape,ShapeH,ShapeW,Type,NumCells,CellAreas,MeanArea,NucleiSize,RatioH,RatioW
41,bdc789019cee8ddfae20d5f769299993b4b330b2d38d12...,"[520, 696]",520,696,1,28,"[495, 1840, 533, 1674, 115, 675, 245, 1886, 12...",1323.178571,36.375522,0.069953,0.052264
1,0999dab07b11bc85fb8464fc36c947fbd8b5d6ec498173...,"[519, 253]",519,253,1,57,"[563, 578, 517, 523, 526, 454, 242, 507, 166, ...",441.701754,21.016702,0.040495,0.08307


In [8]:
df.to_csv('../output/testing_gray53_df_info.csv', index=False)

In [9]:
def resize_shape(row, target_nuclei_size=20, min_size=128):    
    ratio = target_nuclei_size / row['NucleiSize']
    shape = list(row['Shape'])
    h, w, c = row['ShapeH'], row['ShapeW'], 3
    rh, rw = 2*round(h*ratio/2), 2*round(w*ratio/2)
    return [max(rh, min_size), max(rw, min_size), c]

In [10]:
df['ResizeShape'] = df.apply(resize_shape, axis=1)
df['ResizeH'] = df.apply(lambda row: resize_shape(row)[0], axis=1)
df['ResizeW'] = df.apply(lambda row: resize_shape(row)[1], axis=1)

In [11]:
df.sample(2)

Unnamed: 0,ImageId,Shape,ShapeH,ShapeW,Type,NumCells,CellAreas,MeanArea,NucleiSize,RatioH,RatioW,ResizeShape,ResizeH,ResizeW
51,fac507fa4d1649e8b24c195d990f1fc3ca3633d917839e...,"[256, 256]",256,256,1,31,"[70, 112, 122, 105, 43, 82, 70, 90, 24, 159, 7...",85.516129,9.247493,0.036123,0.036123,"[554, 554, 3]",554,554
24,53df5150ee56253fe5bc91a9230d377bb21f1300f443ba...,"[256, 256]",256,256,1,13,"[134, 95, 163, 167, 111, 263, 89, 140, 208, 22...",156.769231,12.520752,0.048909,0.048909,"[408, 408, 3]",408,408


In [12]:
np.min(df['ResizeH']), np.min(df['ResizeW'])

(144, 138)

In [13]:
df.to_csv('../output/testing_gray53_df_resize_info.csv', index=False)

### resizing

In [14]:
def mkdir_p(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def read_image_or_mask(filepath, color_mode=cv2.IMREAD_COLOR, target_size=None):
    """Read an image from a file and resize it."""
    # for mask: color_mode=cv2.IMREAD_GRAYSCALE
    img = cv2.imread(filepath, color_mode)
    if target_size: 
        img = cv2.resize(img, target_size, interpolation = cv2.INTER_AREA)
    return img

In [15]:
original_path = '../input/stage1_test'
resized_path = '../output/stage1_test_gray53_resized'

gray53_ids = set(df["ImageId"])
test_paths = glob.glob(original_path + '/*')
test_paths.sort()
mkdir_p(resized_path)

In [16]:
counter = 0
for path in test_paths:
    counter += 1
    if counter % 100 == 0:
        print(counter)
    image_id = path.split('/')[-1]
    if image_id not in gray53_ids:
        continue
    mkdir_p(resized_path + os.sep + image_id)
    mkdir_p(resized_path + os.sep + image_id + '/images')
    
    # resize image
    image_path = path + '/images/' + image_id + '.png'
    query = df.loc[df['ImageId'] == image_id]
    resize_h = query['ResizeH']
    resize_w = query['ResizeW']
    image = read_image_or_mask(image_path, target_size=(resize_h, resize_w))
    
    resize_image_path = resized_path + os.sep + image_id + '/images/' + image_id + '.png'
    cv2.imwrite(resize_image_path, image)
    
#     # resize masks
#     mask_paths = glob.glob(path + '/masks/*.png')
#     for filepath in mask_paths:
#         mask = read_image_or_mask(filepath, color_mode=cv2.IMREAD_GRAYSCALE, target_size=(resize_h, resize_w))
#         resize_mask_path = resized_path + os.sep + image_id + '/masks/' + (filepath.split('/')[-1])
#         cv2.imwrite(resize_mask_path, mask)  

### split

In [17]:
split_path = '../output/split'
test_gray = open(split_path + os.sep + 'test3_ids_gray_53', 'w')
test_counter = 0
for i in range(len(df)):
    image_id = df.iloc[i]['ImageId']
    test_gray.write('stage1_test_gray53_resized/' + image_id + '\n')
    test_counter += 1

test_gray.close() 

assert(test_counter == 53)


### resize back

In [75]:
original_path = '../input/npys_test_gray53'
resized_path = '../output/npys_test_gray53_resized'

gray53_ids = set(df["ImageId"])
test_paths = glob.glob(original_path + '/*')
test_paths.sort()
mkdir_p(resized_path)
len(test_paths)

53

In [76]:
counter = 0
for path in test_paths:
    counter += 1
    if counter % 100 == 0:
        print(counter)
    image_id = path.split('/')[-1].split('.npy')[0]
    
    # resize npy
    query = df.loc[df['ImageId'] == image_id]
    resize_h = int(query['ShapeH'])
    resize_w = int(query['ShapeW'])
    target_size=(resize_h, resize_w)
    mask = np.load(path)
    mask = (mask > 0).astype(np.int8)
    mask_resize = skimage.transform.resize(np.squeeze((mask > 0).astype(np.int8)), 
                                       target_size, mode='constant', preserve_range=True)
    mask_resize = (mask_resize > 0.5).astype(np.int8)
    lab_mask = skimage.morphology.label(mask_resize)
    resize_image_path = resized_path + os.sep + path.split('/')[-1]
    np.save(resize_image_path, lab_mask)    