## Callin Switzer

Update 10 Feb 2017

Image segmentation to count fruits and measure fruits that were collected in the Kalmia pollination experiment

In [1]:
import cv2

In [2]:
import numpy as np
import matplotlib
matplotlib.use("TkAgg") # have to use this for tkinter to  work below
from matplotlib import pyplot as plt
%matplotlib tk

# scikit image
import skimage
from skimage import io

import os
import pandas as pd



In [None]:
%qtconsole

In [3]:
# list files in directory
# Not adding the images to supplemental data, because they're too big to include
os.chdir("/Users/callinswitzer/Dropbox/ExperSummer2016/Kalmia/KalmiaFruits_28Sept2016/")
mypath = os.getcwd()
onlyfiles = [f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]

In [4]:
# function to find bad contours
def is_contour_good(c, img):
    # approximate the contour
    area1 = cv2.contourArea(c, False) 

    # the contour is 'bad' if it is really small (less then 1000 px)
    # the contour is bad, if it is on the border
    x,y,w,h = cv2.boundingRect(c)       
    if x <= 1 or y <=1 or x+w+1 >= img.shape[1] or y+h+1 >= img.shape[0] or area1 < 1000:
        return False              
    else:
        return True 

In [5]:
photoList = [ x for x in onlyfiles if "DSC" not in x and not x.startswith('.') and x.endswith('.jpg')]

In [6]:
len(photoList) # number of images that I'll process

92

In [7]:
plants = [v.translate(None, '.jpg') for v in photoList]

In [8]:
# function for kalmia segmentation
def kalmSeg(kk):
    # read in image as greyscale
    fpth = photoList[kk]
    img = cv2.imread(fpth,0)

    # Otsu's thresholding after Gaussian filtering
    blur = cv2.GaussianBlur(img,(5,5),0)
    # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    ret,th3 = cv2.threshold(blur,125,255,cv2.THRESH_BINARY_INV)
    # I manually chose a threshold of 125 above

    # now get rid of the leafy bits
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    erosion = cv2.erode(th3,kernel,iterations = 10)
    dilation = cv2.dilate(erosion, kernel, iterations = 10)

    # close small holes
    image = dilation.copy()
    (cnts, _) = cv2.findContours(image.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    mask = np.ones(image.shape[:2], dtype="uint8") * 0 # create a blank black mask

    # loop over the contours
    for c in cnts:
        # if the contour is good, draw it on the mask (draw white on a black background)
        if is_contour_good(c, image):
            cv2.drawContours(mask, [c], -1, 255, -1)

    # see if any bad contours were found
    filHoles = 'no holes were filled--' + str((mask == image).all()) # will be false if a small hole was filled

    # remove the contours from the image and show the resulting images
    image2 = mask.copy()

    # find contours for fruits
    cts, im2 = cv2.findContours(image2.copy(),  cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    perimeters = [cv2.arcLength(cts[ii], True) for ii in range(len(cts))]

    # calculate areas
    areas = [cv2.contourArea(cts[ii], False) for ii in range(len(cts))]

    # calculate circularity
    circularity = [(x **2) / (4*3.14159*y) for x,y in zip(perimeters, areas)]


    arrs = np.array([i < 2 for i in circularity]) # get only roughly circular objects
    # circles will be close to 0

    ctd = np.array(cts)
    filt_contours = ctd[arrs] # will let me see if we miss any fruits
    
    # draw contours
    cv2.drawContours(img, filt_contours, -1, (255,255,0), -1) # note, this modifies the original image, "img"

    # calculate diameters in pixels
    # area = pi * r ^2
    # r = sqrt(area / pi)
    radii = np.array([np.sqrt(aa / 3.14159) for aa in areas])

    # convert to mm instead of pixels
    dia_mm = np.array([aa2/max(radii) * 2 * 10 for aa2 in radii])

    # show circle with largest diameter (black)
    bgCirc = np.array(radii == radii.max())

    # draw filtered contours (black)
    cv2.drawContours(img, ctd[bgCirc], -1, (0,255,0), -1) # note, this modifies the original image, "img"
    #io.imshow(img)

    # compute the center of the contour
    Moms = np.array([cv2.moments(cc) for cc in cts])
    # get xy coordinates
    cs = np.array([[int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"])] for M in Moms])
    xx = np.array([cs[ii, 0] for ii in range(len(cs))])
    yy = np.array([cs[ii, 1] for ii in range(len(cs))])

    # show image
    plt.clf()
    io.imshow(img)

    # add centers
    plt.plot(xx, yy, 'rp')
    plt.xlim(6000, 0)
    plt.ylim(0, 4000)

    # label diameters
    labels = ['dia_{0}'.format(round(i, 2)) for i in dia_mm]
    for label, x, y in zip(labels, xx, yy):
        plt.annotate(
            label, color = 'grey', size = 10,
            xy = (x, y), xytext = (0, 0),
            textcoords = 'offset points', ha = 'right', va = 'bottom')
    plt.show()

    # save plot, but make new directory if it doesn't already exist
    if not os.path.isdir("SegmentedImages/"):
        os.mkdir("SegmentedImages/")
        
    plt.savefig('SegmentedImages/' + str(plants[kk]) + '_segmented.png')

    print len(xx) # number of fruits (including big circle)
    print filHoles
    print 'kk = ' + str(kk)
    
    # save .csv file with information
    # return a pandas data frame
    tmp_DF = pd.DataFrame({'areas': areas, 
              'perimeters': perimeters, 
              'dia_mm': np.ndarray.tolist(dia_mm), 
             'plantNum': plants[kk]})
    return tmp_DF  

In [9]:
## Not run

# # create empty data frame for saving image data
# df_final = pd.DataFrame({'areas': [], 
#               'perimeters': [], 
#               'dia_mm': [], 
#              'plantNum': []})

# #loop through images and save images
# for yy in range(len(plants)):
#     tmp_DF = kalmSeg(yy)
    
#     #append to final data frame
#     df_final = df_final.append(tmp_DF)
    

In [10]:
# write the data to a file
# df_final.to_csv('SegmentedImages/kalmiaFruitFinal.csv')

In [11]:
# print system info
import IPython
print IPython.sys_info()

{'commit_hash': u'5c9c918',
 'commit_source': 'installation',
 'default_encoding': 'UTF-8',
 'ipython_path': '/Users/callinswitzer/anaconda/lib/python2.7/site-packages/IPython',
 'ipython_version': '5.1.0',
 'os_name': 'posix',
 'platform': 'Darwin-15.6.0-x86_64-i386-64bit',
 'sys_executable': '/Users/callinswitzer/anaconda/bin/python',
 'sys_platform': 'darwin',
 'sys_version': '2.7.9 |Anaconda custom (x86_64)| (default, Dec 15 2014, 10:37:34) \n[GCC 4.2.1 (Apple Inc. build 5577)]'}


In [12]:
# show installed packages and versions
!pip freeze 

alabaster==0.7.9
anaconda-clean==1.0
anaconda-client==1.5.1
anaconda-navigator==1.3.1
appnope==0.1.0
appscript==1.0.1
argcomplete==1.0.0
astroid==1.4.7
astropy==1.2.1
Babel==2.3.4
backports-abc==0.4
backports.shutil-get-terminal-size==1.0.0
backports.ssl-match-hostname==3.4.0.2
beautifulsoup4==4.5.1
bitarray==0.8.1
blaze==0.10.1
bokeh==0.12.2
boto==2.42.0
Bottleneck==1.1.0
cdecimal==2.3
cffi==1.7.0
chest==0.2.3
click==6.6
cloudpickle==0.2.1
clyent==1.2.2
colorama==0.3.7
conda==4.2.11
conda-build==2.0.2
configobj==5.0.6
configparser==3.5.0
contextlib2==0.5.3
cryptography==1.3.1
cycler==0.10.0
Cython==0.24.1
cytoolz==0.8.0
dask==0.11.0
datashape==0.5.2
decorator==4.0.10
dill==0.2.5
docutils==0.12
dynd==0.7.3.dev1
enum34==1.1.6
et-xmlfile==1.0.1
fastcache==1.0.2
filelock==2.0.6
Flask==0.11.1
Flask-Cors==2.1.2
funcsigs==1.0.2
functools32==3.2.3.post2
futures==3.0.5
gevent==1.1.2
greenlet==0.4.10
grin==1.2.1
h5py==2.6.0
HeapDict==1.0.0
