In [7]:
# Code by Beatrice Madison and Joelle Frye
# 05/02/19

import pathlib
import imageio
import numpy as np
from scipy import ndimage
import skimage
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import pandas as pd
from skimage.filters import threshold_otsu
from skimage.filters import threshold_yen
from skimage.exposure import histogram
from skimage.feature import canny
from scipy import ndimage as ndi
from skimage import morphology
from skimage.filters import sobel
from skimage.measure import label
from skimage.filters import try_all_threshold
from tqdm import tqdm
from skimage.feature import blob_dog, blob_log, blob_doh
from math import sqrt

def analyze_image(im_path):
    '''
    Take an image_path (pathlib.Path object), preprocess and label it, extract the RLE strings 
    and dump it into a Pandas DataFrame.
    '''
    # Read in data and convert to grayscale
    im_id = im_path.parts[-3]
    im = imageio.imread(str(im_path))
    im_gray = rgb2gray(im)
    
    # Displaying the actual image in grayscale
    #plt.imshow(im_gray, cmap=plt.cm.gray, interpolation='nearest') 
    

    # Method 1: Otsu histogram cutoff and removing small objects. Score: .387
    threshold = threshold_otsu(im_gray)
    final = im_gray > threshold
    final = label(final)
    mask = np.where(final, 0, 1)
    final = morphology.remove_small_objects(label(mask), 60) # Works only after labeling the individual objects
    
    # Mask out background and extract connected objects
    mask = np.where(final > 0, 1, 0)
    if np.sum(mask==0) < np.sum(mask==1):
        mask = np.where(mask, 0, 1)    
        labels, nlabels = ndimage.label(mask)
    labels, nlabels = ndimage.label(mask)
    # End method 1
    
    
    '''
    # Method 2: Yen histogram cutoff and removing small objects. Score: .29
    threshold = threshold_yen(im_gray)
    final = im_gray > threshold
    final = label(final)
    mask = np.where(final, 0, 1)
    final = morphology.remove_small_objects(label(mask), 60) # Works only after labeling the individual objects
    
    # Mask out background and extract connected objects
    mask = np.where(final > 0, 1, 0)
    if np.sum(mask==0) < np.sum(mask==1):
        mask = np.where(mask, 0, 1)    
        labels, nlabels = ndimage.label(mask)
    labels, nlabels = ndimage.label(mask)
    # End Method 2
    '''
    
    '''
    # Method 3: Using sobel Method in combination with otsu thresholding. Score: 
    threshold = threshold_otsu(im_gray)    
    final = sobel(im_gray > (threshold))
    markers = np.zeros_like(im_gray)
    segmentation = morphology.watershed(final, markers)
    final = ndi.binary_fill_holes(final)
    final = morphology.remove_small_objects(label(final), 60)
    
    
    # Mask out background and extract connected objects
    mask = np.where(final > 0, 1, 0)
    if np.sum(mask==0) < np.sum(mask==1):
        mask = np.where(mask, 0, 1)    
        labels, nlabels = ndimage.label(mask)
    labels, nlabels = ndimage.label(mask)
    # End Method 3
    '''
    '''
    # Method 4: Using canny in combination with otsu thresholding. Score: 
    threshold = threshold_otsu(im_gray)
    final = canny(im_gray > threshold)
    final = ndi.binary_fill_holes(final)
    final = morphology.remove_small_objects(final, 60)
    
    
    # Mask out background and extract connected objects
    mask = np.where(final > 0, 1, 0)
    if np.sum(mask==0) < np.sum(mask==1):
        mask = np.where(mask, 0, 1)    
        labels, nlabels = ndimage.label(mask)
    labels, nlabels = ndimage.label(mask)
    # End Method 4
    
    
    #Display altered image
    #fig, axes = plt.subplots(1, 1, figsize=(6, 6))
    #plt.tight_layout()
    #plt.imshow(mask, cmap=plt.cm.gray, interpolation='nearest')
    
    # Loop through labels and add each to a DataFrame
    im_df = pd.DataFrame()
    for label_num in range(1, nlabels+1):
        label_mask = np.where(labels == label_num, 1, 0)
        if label_mask.flatten().sum() > 10:
            rle = rle_encoding(label_mask)
            s = pd.Series({'ImageId': im_id, 'EncodedPixels': rle})
            im_df = im_df.append(s, ignore_index=True)
    
    return im_df
'''




def analyze_list_of_images(im_path_list):
    '''
    Takes a list of image paths (pathlib.Path objects), analyzes each,
    and returns a submission-ready DataFrame.'''
    all_df = pd.DataFrame()
    for im_path in tqdm(im_path_list):
        im_df = analyze_image(im_path)
        all_df = all_df.append(im_df, ignore_index=True)
    
    return all_df



def rle_encoding(x):
    '''
    x: numpy array of shape (height, width), 1 - mask, 0 - background
    Returns run length as list
    '''
    dots = np.where(x.T.flatten()==1)[0] # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b+1, 0))
        run_lengths[-1] += 1
        prev = b
    return " ".join([str(i) for i in run_lengths])

testing = pathlib.Path('./input/stage1_test/').glob('*/images/*.png')
df = analyze_list_of_images(list(testing))
df.to_csv('submission.csv', index=None)



100%|██████████| 65/65 [00:04<00:00, 14.74it/s]
