### egg counting
#### updated 2016.09.09
##### bmatthews@rockefeller.edu

In [1]:
### need a few libraries
import cv2
import numpy as np
import os
import pandas as pd

### define a directory containing handcounted images and a .csv file with the handcounts

Load a list of all .png images (except those starting with THRESH)

In [7]:
test_dir = '/Users/ben/bioinfo/github/eggCount/test_images/'
os.chdir(test_dir)
png_files = [f for f in os.listdir(test_dir) if (f.endswith('.png') and not f.startswith('THRESH'))]

### define a dataframe to store our pixel counts and read in the hand counts

In [8]:
pixel_count = pd.DataFrame({'filename':png_files,'egg_pixels':[0] * len(png_files)}) 
handCount = pd.read_csv('hand.csv',header=None).transpose()
handCount.columns = ['handCount','filename']
pixel_count = pd.merge(handCount,pixel_count)

### Defining a set of arrays that represent the min/max filters for (H)ue, (S)aturation and (V)alue thresholding

In [201]:
BEST_A = 183
BEST_B = 46
BEST_C = 201
RANGE = 4
STEP = 2

minList = [np.array([0, 0, 0],np.uint8)]

for a in range(0,1):
    for b in range(0,1):
        for c in range(0,255,5):
            minList.append(np.array([a, b, c],np.uint8))


maxList = [np.array([173, 46, 201],np.uint8)] * len(maxList)

#### going through our filtering arrays, updating results with the 'best' correlation coefficients

In [196]:
WINNER = 0

for EGG_MIN, EGG_MAX in zip(minList, maxList):
    for input_file in png_files:
        current_working = cv2.imread(input_file)
        hsv = cv2.cvtColor(current_working, cv2.COLOR_BGR2HSV)
        frame_threshed = cv2.inRange(hsv, EGG_MIN, EGG_MAX)
        cv2.imwrite('THRESH_' + input_file, frame_threshed)
        pixel_count.loc[pixel_count['filename'] == input_file,'egg_pixels'] = cv2.countNonZero(frame_threshed)
    
    corr = pixel_count['handCount'].apply(int).corr(pixel_count['egg_pixels'].apply(int))
    if(corr * corr > WINNER):
        WINNER = corr * corr
        print('new best R squared value is ' + str(corr * corr) + ' min: ' + str(EGG_MIN) + ' max: ' + str(EGG_MAX))

new best R squared value is 0.961112924983 min: [0 0 0] max: [173  46 201]
new best R squared value is 0.961114860491 min: [ 0  0 80] max: [173  46 201]
new best R squared value is 0.961455075468 min: [ 0  0 85] max: [173  46 201]
new best R squared value is 0.962281841626 min: [ 0  0 90] max: [173  46 201]
new best R squared value is 0.962855757401 min: [ 0  0 95] max: [173  46 201]
new best R squared value is 0.963976046236 min: [  0   0 100] max: [173  46 201]
new best R squared value is 0.965443732104 min: [  0   0 105] max: [173  46 201]
new best R squared value is 0.966803710295 min: [  0   0 110] max: [173  46 201]
new best R squared value is 0.966844555236 min: [  0   0 115] max: [173  46 201]


### Define the meat of this
#### run_directory() is a function that takes a list of image files and threshold min/max and does the thresholding and pixel counting, writing a .csv file in return

In [2]:
def run_directory(png_files, EGG_MIN, EGG_MAX):
    pixel_count = pd.DataFrame({'filename':png_files,'egg_pixels':[0] * len(png_files)}) 
    for input_file in png_files:
        current_working = cv2.imread(input_file)
        hsv = cv2.cvtColor(current_working, cv2.COLOR_BGR2HSV)
        frame_threshed = cv2.inRange(hsv, EGG_MIN, EGG_MAX)
        cv2.imwrite('THRESH_' + input_file, frame_threshed)
        pixel_count.loc[pixel_count['filename'] == input_file,'egg_pixels'] = cv2.countNonZero(frame_threshed)
    
    pixel_count.to_csv('pixel_counts.csv')

### Now we run some experimental images

In [13]:
EGG_MIN = np.array([0, 0, 0],np.uint8)
EGG_MAX = np.array([173, 46, 201],np.uint8)

run_dirs = ['/Users/ben/Dropbox/sdworkin2016/egg_photos/control_experiments',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_A',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_B',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_C',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_D',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_E',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_F',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/substrate_blind_test/a',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/substrate_blind_test/b',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/substrate_blind_test/c']

for test_dir in run_dirs:
    os.chdir(test_dir)
    png_files = [f for f in os.listdir(test_dir) if (f.endswith('.png') and not f.startswith('THRESH'))]
    print('working on directory ' + test_dir + ":\n")
    run_directory(png_files, EGG_MIN, EGG_MAX)

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/control_experiments:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_A:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_B:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_C:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_D:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_E:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_experiments/strain_F:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/substrate_blind_test/a:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/substrate_blind_test/b:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/substrate_blind_test/c:



In [3]:
EGG_MIN = np.array([0, 0, 0],np.uint8)
EGG_MAX = np.array([173, 46, 201],np.uint8)

run_dirs = ['/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_150mOsm/A',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_150mOsm/B',
           '/Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_150mOsm/C']

for test_dir in run_dirs:
    os.chdir(test_dir)
    png_files = [f for f in os.listdir(test_dir) if (f.endswith('.png') and not f.startswith('THRESH'))]
    print('working on directory ' + test_dir + ":\n")
    run_directory(png_files, EGG_MIN, EGG_MAX)

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_150mOsm/A:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_150mOsm/B:

working on directory /Users/ben/Dropbox/sdworkin2016/egg_photos/genotype_150mOsm/C:

