In [57]:
import numpy as np
import os
from scipy import stats
from PIL import Image
from skimage import color
from skimage.util.dtype import dtype_range
from skimage.util import img_as_ubyte
from skimage.morphology import disk
from skimage.filters.rank import gradient
from skimage.filters import roberts, sobel, threshold_otsu
from skimage.feature import corner_harris, corner_peaks, blob_log, blob_doh, blob_dog

In [58]:
#This function walks through all the files in the main 50_categories folder
# to generate an iterable list of file paths to submit later for feature extraction

def directory_search(directory):
    directory_list = [] #initialize list of lists to collect features 
    subdirectories = [f for f in os.listdir(directory) if not f.startswith('.')] #ignores hidden folders
    for i in range(len(subdirectories)):
        folder = subdirectories[i]
        subdirectory = directory+folder
        files = [f for f in os.listdir(subdirectory) if not f.startswith('.')] #ignores hidden folders
        for j in range(len(files)):
            file = files[j]
            file_name = subdirectory+'/'+file
            directory_list.append(file_name)
    return directory_list

#Assemble directory List w/above function
directory = '/Users/Charlie/Desktop/tester/' #define this as YOUR path to the 50_categories folder
list_of_paths = directory_search(directory) 

In [55]:
#Feature Extraction Function
def feature_extraction(path):
    parts = path.split('/')
    category = parts[5]
    file_name = parts[6]
    
    #Read image
    im = Image.open(path)
    image = np.array(im)
    #Basic Feature Extraction
    
    #Image Size
    y = image.shape[0]
    x = image.shape[1]

    #Image Color
    if len(image.shape) == 2: #image is grey scale
        image_gray = image
        red_val = 'nan'
        grn_val = 'nan'
        blu_val = 'nan'
        most_freq_color = 'nan' #will clear later(?)
    elif len(image.shape) == 3: #image is RGB color
    
    #Most Frequent Color (really go with 2nd most freq, many of the images will have white borders and make it the most common, which is not helpful)
        pixels = im.getcolors(x * y)
        most_frequent_pixel = pixels[1]
    #Convert RGB Values to single unique number
        most_freq_color = 65536*most_frequent_pixel[1][0] + 256*most_frequent_pixel[1][1] + most_frequent_pixel[1][2]

    #Avg Value per Color channel
        red_val = np.mean(image[:,:,0])
        grn_val = np.mean(image[:,:,1])
        blu_val = np.mean(image[:,:,2])
        grey_val = np.mean(image_gray)

    #Outsu Binary Score
    thresh = threshold_otsu(image_gray)
    binary = image_gray > thresh
    binary_per = np.sum(binary)/(x*y) #since we'll be comparing images of different sizes, its more appropriate to use percent of image converted in the binary and not total binary score (# of black pixels)

    #Sharpness and Sharpness Frequencies
    selection_element = disk(5) # matrix of n pixels with a disk shape
    sharpness = (gradient(image_gray, selection_element))
    mean_sharp=np.mean(sharpness)
    sharp = sharpness.flatten()
    shrp = np.fft.fft(sharp)
    freq = np.fft.fftfreq(shrp.size, d=1)
    avg_sharp_freq = np.mean(freq)

    #Edge Detection
    edge_roberts = roberts(image_gray)
    edge_sobel = sobel(image_gray)
    roberts_score = np.sum(edge_roberts)/(x*y) #normalize to size of image
    sobel_score = np.sum(edge_sobel)/(x*y)

    #Blobs - Difference of Gaussian [x coord, y coord, radius size]
    blobs_dog = blob_dog(image_gray, max_sigma=30, threshold=.1)
    # use this method for image coordinates for fitting if corners detection fails later
    
    #Blobs - Laplacian of Gaussian [x coord, y coord, radius size]
    blobs_log = blob_log(image_gray, max_sigma=30, num_sigma=10, threshold=.1) 
    gaussian_blobs = len(blobs_log)
    guassian_means = np.mean(blobs_log[:,2])

    #Blobs - Determinant of Hessian [x coord, y coord, radius size]
    blobs_doh = blob_doh(image_gray, max_sigma=30, threshold=.01) 
    hessain_blobs = len(blobs_doh)
    hessain_means = np.mean(blobs_doh[:,2])
    
    #Linear Fitting With Corners Detction (Linear Fit to detect corner locations, help determine shape orientation)
    coords = corner_peaks(corner_harris(image_gray), min_distance=5)
    if coords.shape[0] == 0: #for some reason, corners fails on certain images, use diff method to find feature coordinates
        coords=np.empty([len(blobs_dog),2])
        coords[:,0] = blobs_dog[:,0]
        coords[:,1] = blobs_dog[:,1]
        
    lin_slope, lin_intercept, lin_r_value, lin_p_value, lin_std_err = stats.linregress(coords[:,0],coords[:,1])
    lin_rsq_value = lin_r_value**2

    return [file_name, x, y, most_freq_color, red_val, grn_val, blu_val, grey_val,
          binary_per, mean_sharp, avg_sharp_freq, roberts_score, sobel_score,
          lin_slope, lin_intercept, lin_rsq_value, lin_p_value, lin_std_err,
          gaussian_blobs, guassian_means, hessain_blobs, hessain_means, category]

In [64]:
list_of_paths

['/Users/Charlie/Desktop/tester/gorilla/gorilla_0016.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0002.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0003.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0017.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0001.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0015.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0029.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0028.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0014.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0038.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0004.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0010.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0011.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0005.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0039.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0013.jpg',
 '/Users/Charlie/Desktop/tester/gorilla/gorilla_0007.jpg

In [71]:
path = '/Users/Charlie/Desktop/tester/gorilla/gorilla_0128.jpg'

In [72]:
#Read image
im = Image.open(path)
image = np.array(im)
#Basic Feature Extraction

#Image Size
y = image.shape[0]
x = image.shape[1]

#Image Color
if len(image.shape) == 2: #image is grey scale
    image_gray = image
    red_val = 'nan'
    grn_val = 'nan'
    blu_val = 'nan'
    most_freq_color = 'nan' #will clear later
elif len(image.shape) == 3: #image is RGB color
    
    #Most Frequent Color (really go with 2nd most freq, many of the images will have white borders and make it the most common, which is not helpful)
    pixels = im.getcolors(x * y)
    most_frequent_pixel = pixels[1]
    #Convert RGB Values to single unique number
    most_freq_color = 65536*most_frequent_pixel[1][0] + 256*most_frequent_pixel[1][1] + most_frequent_pixel[1][2]

    #Avg Value per Color channel
    red_val = np.mean(image[:,:,0])
    grn_val = np.mean(image[:,:,1])
    blu_val = np.mean(image[:,:,2])
    

'nan'

In [70]:
len(image.shape)

2

In [77]:
import pandas as pd
df = pd.read_csv('category_vals',sep='\t',index_col=0)

In [78]:
df

Unnamed: 0,file name,x,y,most_freq_color,red_val,grn_val,blu_val,grey_val,binary_per,mean_sharp,...,lin_slope,lin_intercept,lin_rsq_value,lin_p_value,lin_std_err,gaussian_blobs,guassian_means,hessain_blobs,hessain_means,category
0,gorilla_0016.jpg,319,349,16711679.0,149.408592,148.160234,140.587114,0.579920,0.458336,86.852961,...,0.029181,139.067269,0.000920,8.487786e-01,0.152048,281,3.614472,36,11.919753,gorilla
1,gorilla_0002.jpg,340,350,16711679.0,61.931454,68.196050,72.891277,0.263543,0.252782,64.113992,...,0.220314,6.310923,0.892619,5.521501e-02,0.054033,177,2.547395,8,5.833333,gorilla
2,gorilla_0003.jpg,400,600,16711679.0,75.804963,94.221250,84.071525,0.351278,0.394188,104.556667,...,0.477314,32.226093,0.166816,1.393848e-04,0.119265,1437,1.771360,35,5.971429,gorilla
3,gorilla_0017.jpg,768,512,16711527.0,71.609828,74.130844,64.048658,0.285758,0.100728,23.922849,...,0.087223,418.910689,0.000101,9.508439e-01,1.405571,111,3.902903,1,30.000000,gorilla
4,gorilla_0001.jpg,300,365,65536.0,54.888630,56.712100,47.096968,0.218162,0.271096,68.474110,...,-0.242942,227.728077,0.060603,7.558269e-02,0.133936,266,2.611111,7,18.031746,gorilla
5,gorilla_0015.jpg,169,225,16711675.0,83.435792,82.722788,77.099855,0.323407,0.432715,99.997423,...,0.084457,74.177197,0.007359,8.019776e-01,0.326961,168,2.074074,3,18.185185,gorilla
6,gorilla_0029.jpg,259,400,3428658.0,81.894575,110.468668,77.327394,0.400028,0.432046,160.046062,...,0.128803,100.513676,0.035737,4.174289e-03,0.044505,1201,1.504394,57,5.805068,gorilla
7,gorilla_0028.jpg,280,187,16711673.0,124.003571,133.906990,113.513789,0.511107,0.598109,93.401509,...,0.782334,120.512988,0.873321,1.991678e-02,0.172027,294,1.679516,5,15.822222,gorilla
8,gorilla_0014.jpg,640,421,16711679.0,95.764196,77.646441,84.449106,0.321517,0.283848,65.522628,...,-0.363492,452.687155,0.056416,2.500923e-02,0.159377,868,1.664491,16,12.881944,gorilla
9,gorilla_0038.jpg,422,648,16711679.0,114.562175,121.172825,88.831871,0.460534,0.490964,90.139979,...,-0.145321,226.653694,0.080493,4.152596e-02,0.069461,1157,2.063862,16,12.680556,gorilla
