In [27]:
import numpy as np
from scipy import stats
from PIL import Image
from skimage import color
from skimage.util.dtype import dtype_range
from skimage.util import img_as_ubyte
from skimage.morphology import disk
from skimage.filters.rank import gradient
from skimage.filters import roberts, sobel, threshold_otsu
from skimage.feature import corner_harris, corner_peaks, blob_log, blob_doh, blob_dog

In [14]:
#Feature Extraction Function
def feature_extraction(path):
    parts = path.split('/')
    category = parts[5]
    file_name = parts[6]
    
    #Read image
    im = Image.open(path)
    image = np.array(im)
    image_gray = color.rgb2gray(image)

    #Basic Feature Extraction
    #Image Size
    y = image.shape[0]
    x = image.shape[1]

    #Most Frequent Color (really go with 2nd most freq, many of the images will have white borders and make it the most common, which is not helpful)
    pixels = im.getcolors(x * y)
    most_frequent_pixel = pixels[1]
    #Convert RGB Values to single unique number
    if type(most_frequent_pixel) == 'tuple': #have to check because this fails on some images...?
        most_freq_color = 65536*most_frequent_pixel[1][0] + 256*most_frequent_pixel[1][1] + most_frequent_pixel[1][2]
    else:
        most_freq_color = 'nan' #will clear later

    #Avg Value per Color channel
    if image.shape[2] == 3:
        red_val = np.mean(image[:,:,0])
        grn_val = np.mean(image[:,:,1])
        blu_val = np.mean(image[:,:,2])
    else:
        red_val = 'nan'
        grn_val = 'nan'
        blu_val = 'nan'
    
    grey_val = np.mean(image_gray)

    #Outsu Binary Score
    thresh = threshold_otsu(image_gray)
    binary = image_gray > thresh
    binary_per = np.sum(binary)/(x*y) #since we'll be comparing images of different sizes, its more appropriate to use percent of image converted in the binary and not total binary score (# of black pixels)

    #Sharpness and Sharpness Frequencies
    selection_element = disk(5) # matrix of n pixels with a disk shape
    sharpness = (gradient(image_gray, selection_element))
    mean_sharp=np.mean(sharpness)
    sharp = sharpness.flatten()
    shrp = np.fft.fft(sharp)
    freq = np.fft.fftfreq(shrp.size, d=1)
    avg_sharp_freq = np.mean(freq)

    #Edge Detection
    edge_roberts = roberts(image_gray)
    edge_sobel = sobel(image_gray)
    roberts_score = np.sum(edge_roberts)/(x*y) #normalize to size of image
    sobel_score = np.sum(edge_sobel)/(x*y)

    #Blobs - Difference of Gaussian [x coord, y coord, radius size]
    blobs_dog = blob_dog(image_gray, max_sigma=30, threshold=.1)
    # use this method for image coordinates for fitting if corners detection fails later
    
    #Blobs - Laplacian of Gaussian [x coord, y coord, radius size]
    blobs_log = blob_log(image_gray, max_sigma=30, num_sigma=10, threshold=.1) 
    gaussian_blobs = len(blobs_log)
    guassian_means = np.mean(blobs_log[:,2])

    #Blobs - Determinant of Hessian [x coord, y coord, radius size]
    blobs_doh = blob_doh(image_gray, max_sigma=30, threshold=.01) 
    hessain_blobs = len(blobs_doh)
    hessain_means = np.mean(blobs_doh[:,2])
    
    #Linear Fitting With Corners Detction (Linear Fit to detect corner locations, help determine shape orientation)
    coords = corner_peaks(corner_harris(image_gray), min_distance=5)
    if coords.shape[0] == 0: #for some reason, corners fails on certain images, use diff method to find feature coordinates
        coords=np.empty([len(blobs_dog),2])
        coords[:,0] = blobs_dog[:,0]
        coords[:,1] = blobs_dog[:,1]
        
    lin_slope, lin_intercept, lin_r_value, lin_p_value, lin_std_err = stats.linregress(coords[:,0],coords[:,1])
    lin_rsq_value = lin_r_value**2

    return [file_name, x, y, most_freq_color, red_val, grn_val, blu_val, grey_val,
          binary_per, mean_sharp, avg_sharp_freq, roberts_score, sobel_score,
          lin_slope, lin_intercept, lin_rsq_value, lin_p_value, lin_std_err,
          gaussian_blobs, guassian_means, hessain_blobs, hessain_means, category]

  .format(dtypeobj_in, dtypeobj_out))


ValueError: Inputs must not be empty.

In [15]:
im.show(image)

In [46]:
coords = corner_peaks(corner_harris(image_gray), min_distance=5)
lin_slope, lin_intercept, lin_r_value, lin_p_value, lin_std_err = stats.linregress(coords[:,0],coords[:,1])
lin_rsq_value = lin_r_value**2

ValueError: Inputs must not be empty.

In [54]:
if coords.shape[0] == 0:
    coords=np.empty([len(blobs_dog),2])
    coords[:,0] = blobs_dog[:,0]
    coords[:,1] = blobs_dog[:,1]

coords

array([[ 161.,  400.],
       [ 138.,   78.],
       [ 138.,   70.],
       [ 131.,  191.],
       [ 127.,  205.],
       [ 124.,  216.],
       [ 122.,  315.],
       [ 122.,  147.],
       [ 121.,  120.],
       [ 120.,  139.],
       [ 115.,   94.],
       [ 114.,  320.],
       [ 114.,  284.],
       [ 114.,  273.],
       [ 113.,  326.],
       [ 113.,  180.],
       [ 112.,   67.],
       [ 108.,  356.],
       [ 106.,    0.],
       [ 105.,  329.],
       [ 104.,  324.],
       [ 103.,  319.],
       [  98.,   72.],
       [  93.,  203.],
       [  90.,  400.],
       [  90.,  243.],
       [  74.,  122.],
       [   0.,  400.],
       [   0.,  270.]])

In [7]:
# #Avg Color - Likely not gonna be helpful - Also removed from the actual code bc its very very slow
# colour_tuple = [None, None, None]
# for channel in range(3):
# # Get data for one channel at a time
#     pixels = im.getdata(band=channel)
#     values = []
#     for pixel in pixels:
#         values.append(pixel)
#         colour_tuple[channel] = sum(values) / len(values)
# #Convert RGB Values to single unique number
# Avg_color = 65536*colour_tuple[0] + 256*colour_tuple[1] + colour_tuple[2]

In [44]:
coords=np.empty([len(blobs_dog),2])
coords[:,0] = blobs_dog[:,0]
coords[:,1] = blobs_dog[:,1]


array([[ 161.,  400.],
       [ 138.,   78.],
       [ 138.,   70.],
       [ 131.,  191.],
       [ 127.,  205.],
       [ 124.,  216.],
       [ 122.,  315.],
       [ 122.,  147.],
       [ 121.,  120.],
       [ 120.,  139.],
       [ 115.,   94.],
       [ 114.,  320.],
       [ 114.,  284.],
       [ 114.,  273.],
       [ 113.,  326.],
       [ 113.,  180.],
       [ 112.,   67.],
       [ 108.,  356.],
       [ 106.,    0.],
       [ 105.,  329.],
       [ 104.,  324.],
       [ 103.,  319.],
       [  98.,   72.],
       [  93.,  203.],
       [  90.,  400.],
       [  90.,  243.],
       [  74.,  122.],
       [   0.,  400.],
       [   0.,  270.]])