In [None]:
# load the libraries here
import os

In [None]:
# path to images
path_to_data =  r"D:\Jellybean\Relevant\jellybean_data-master"

In [None]:
# get the folder names
folders = os.listdir(path_to_data)

In [None]:
# see what's in folders
folders[:5]

In [None]:
# skimage - RGB, CV2 - BGR
import skimage 
# otsu import
from skimage.filters import threshold_otsu
# 3-D to 2-D
from skimage.color import rgb2gray, rgb2hsv, rgb2yuv
# for filling the small holes
from scipy import ndimage
# clear the border
from skimage.segmentation import clear_border
# erosion
from skimage.morphology import binary_erosion
# mainly will use for watershed
import cv2
import numpy as np

In [None]:
# matplotlib for plotting
import matplotlib.pyplot as plt

In [None]:
# status bar
from tqdm.notebook import tqdm

In [None]:
# Now Let's work with developing the watershed idea with one image
# and hopefully generalize that to the others

In [None]:
save_path = r"D:\Jellybean\Second_Attempt\Split_Images\\"

In [None]:
def get_the_images(folder): 
    # get the images
    images = os.listdir(os.path.join(path_to_data, folder))
    counter = 1
    # read each image
    for image in tqdm(images): 
        img = plt.imread(os.path.join(os.path.join(path_to_data, folder), image))
        # make the image grayscale
        gray_image = rgb2gray(img) 
        # get the otsu thresholding
        img_threshold = threshold_otsu(gray_image)
        # threshold the image
        binary = gray_image > img_threshold
        # invert? 
        binary_invert = 1 - binary
        # fill small holes
        binary_invert_filled = ndimage.binary_fill_holes(binary_invert).astype(int)
        # clear the border
        clear_border_img = clear_border(binary_invert_filled)
        # distance transform
        distance = ndimage.distance_transform_edt(clear_border_img)
        # need to find a thresholding for this image too
        # resort to otsu
        img_threshold_distance = threshold_otsu(distance)
        # threshold the image
        sure_fg = distance > img_threshold_distance
        # do the erosion twice to get the sure foreground areas
        # erosion once
        sure_sure_fg = binary_erosion(sure_fg, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(50,50))).astype(int)
        # erosion twice
        sure_sure_fg = binary_erosion(sure_sure_fg, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(50,50))).astype(int)
        # what is the sure background
        sure_bg = clear_border_img

        # unknown part is sure_bg - sure_fg
        unknown = cv2.subtract(sure_bg,sure_sure_fg)

        # make it uint8 - cv2 quirk
        unknown = cv2.convertScaleAbs(unknown*255)

        # see the unknown images
        plt.imshow(unknown, "gray")

        # convert to unint8 again
        sure_sure_fg = cv2.convertScaleAbs(sure_sure_fg*255)

        # Marker labelling
        ret, markers = cv2.connectedComponents(sure_sure_fg)

        # Add one to all labels so that sure background is not 0, but 1
        markers = markers+1

        # Now, mark the region of unknown with zero
        markers[unknown==255] = 0

        # convert to uint8
        gray_image = cv2.convertScaleAbs(gray_image*255)


        # make it 3-D
        gray_image = np.dstack([gray_image]*3)

        # finally watershed
        markers = cv2.watershed(gray_image,markers)

        # marker 1 has all the masks
        binary_markers = markers == 1

        # invert
        binary_markers = 1 - binary_markers.astype(int)

        # convert to uint8
        binary_markers = cv2.convertScaleAbs(binary_markers*255)

        plt.imshow(binary_markers, "gray")

        # find the contours
        contours, hierarchy = cv2.findContours(binary_markers, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        # contours = contours[0] if len(contours) == 2 else contours[1]

        # blank image
        blank_image = np.zeros((markers.shape[0], markers.shape[1]))

        # a dark image
        mask = np.zeros(markers.shape, dtype="uint8")
        # iterate over the found markers
        for marker in np.unique(markers):
            # markers >=2 are the individual jellybeans
            if marker >= 2:
                # find the mask for the marker
                temp_img = markers == marker
                # convert to uint8
                temp_img = cv2.convertScaleAbs(temp_img*255)
                # detect contour in the mask
                contours, hierarchy = cv2.findContours(temp_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                # fit an ellipse to the contour
                e = cv2.fitEllipse(contours[0])
                # get the mask define by the ellipse
                mask=cv2.ellipse(mask, e, color=(255,255,255), thickness=-1)


        # pass the mask through find contours
        contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        # iterate over the contours and get the RGB jellybean
        for cnt in contours: 
            # define a blank image
            blank_image = np.zeros(markers.shape, dtype="uint8")
            # draw the filled contour on the blank image
            jellybean_mask = cv2.drawContours(blank_image, [cnt], -1, (255,255,255), -1)
    
            # draw a rectangle
            x,y,w,h = cv2.boundingRect(cnt)
    
            # get the rectangle image
            rect_img = img[y:y+h,x:x+w]
            
            # saving path
            path = save_path + folder + "_" + str(counter) + ".jpg"
            
            # increment
            counter = counter + 1
            
            # save the image
            plt.imsave(path, rect_img)

In [None]:
from joblib import Parallel, delayed

In [None]:
Parallel(n_jobs=6, verbose = 3)(delayed(get_the_images)(i ) for i in folders)

In [None]:
for folder in tqdm(folders):
    get_the_images(folder)