In [None]:
import cv2
import numpy as np
import time
from skimage.feature import local_binary_pattern, hog
from sklearn.base import BaseEstimator, TransformerMixin

Basic histograms for each channel, so: H: 180 bins, S: 256 bins, V: 256 bins


In [None]:
def extract_hsv_histogram(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    h_hist = cv2.calcHist([hsv], [0], None, [180], [0, 180])
    s_hist = cv2.calcHist([hsv], [1], None, [256], [0, 256])
    v_hist = cv2.calcHist([hsv], [2], None, [256], [0, 256])
    cv2.normalize(h_hist, h_hist)
    cv2.normalize(s_hist, s_hist)
    cv2.normalize(v_hist, v_hist)
    return np.concatenate([h_hist, s_hist, v_hist]).flatten()

Laplacian operator is sensitive to noise so blur the image slightly - I am using a 3x3 kernel to not blur too much


In [None]:
def extract_laplace_variance(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gray_blurred = cv2.GaussianBlur(gray, (3, 3), 0)
    return np.array([cv2.Laplacian(gray_blurred, cv2.CV_64F).var()])

HOG to extract edge/shape information from images.

pixels_per_cell will be tuned by trying combinations for (8, 8), (16, 16) since it is the most effective parameter.

for other parameters, use the default values to not make the problem too complex.

In [None]:
def extract_hog_features(image, pixels_per_cell):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    return hog(gray, orientations=9, pixels_per_cell=pixels_per_cell, cells_per_block=(3, 3), block_norm='L2-Hys')

LBP to extract texture from images. ['uniform'](https://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.local_binary_pattern) used to make result rotation invariant and to have a lower dimensional feature vector.

lbp_points and lbp_radius will be tuned by trying combinations for lbp_points = 8, 16 and lbp_radius = 1, 2, 3

In [None]:
def extract_lbp_histogram(image, lbp_points, lbp_radius):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    lbp = local_binary_pattern(gray, P=lbp_points, R=lbp_radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=lbp_points + 2, range=(0, lbp_points + 2))
    hist = hist.astype("float")
    hist /= hist.sum()
    return hist

Wrap feature extraction in a "transformer" class so that we can put it in a pipeline and GridSearchCV can tune hyperparameters for HOG, LBP features at the same time as the machine learning model hyperparameters.

[Creating custom transformers in python](https://medium.com/@pgshanding/creating-custom-transformers-in-python-and-scikit-learn-10767487017e)


In [None]:
class FeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, lbp_radius=1, lbp_points=8, hog_pixels_per_cell=(8, 8)):
        self.lbp_radius = lbp_radius
        self.lbp_points = lbp_points
        self.hog_pixels_per_cell = hog_pixels_per_cell

    def fit(self, X, y=None):
        return self # fit method typically does nothing for transformers

    def transform(self, X, y=None):
        features_list = []
        total_images = len(X)
        print(f'starting feature extraction for {total_images} images...')
        start_time = time.time()
        
        for i, image in enumerate(X):
            hsv_hist = extract_hsv_histogram(image)
            laplace_var = extract_laplace_variance(image)
            lbp_hist = extract_lbp_histogram(image, self.lbp_points, self.lbp_radius)
            hog_feats = extract_hog_features(image, self.hog_pixels_per_cell)
            
            all_features = np.hstack([hsv_hist, laplace_var, lbp_hist, hog_feats])
            features_list.append(all_features)
        
        total = time.time() - start_time
        print(f'finished extracting features in {total} seconds')
        return np.array(features_list)