In [10]:
import cv2
import numpy as np
import time
import os
from itertools import product
from skimage.feature import local_binary_pattern, hog

previous idea : Basic histograms for each channel, so: H: 180 bins, S: 256 bins, V: 256 bins. this resulted in a feature vector that was too big.

current idea: extract the mean and std for each channel (hsv) instead


In [11]:
def extract_hsv_stats(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    h, s, v = cv2.split(hsv)

    mean_h, std_h  = np.mean(h), np.std(h)
    mean_s, std_s  = np.mean(s), np.std(s)
    mean_v, std_v  = np.mean(v), np.std(v)

    return np.array([mean_h, std_h, mean_s, std_s, mean_v, std_v])

Laplacian operator is sensitive to noise so blur the image slightly - I am using a 3x3 kernel to not blur too much


In [12]:
def extract_laplace_variance(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gray_blurred = cv2.GaussianBlur(gray, (3, 3), 0)
    return np.array([cv2.Laplacian(gray_blurred, cv2.CV_32F).var()])

HOG to extract edge/shape information from images.

pixels_per_cell will be tuned by trying combinations for (8, 8), (16, 16) since it is the most effective parameter.

for other parameters, use the default values to not make the problem too complex.

In [13]:
def extract_hog_features(image, pixels_per_cell):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    return hog(gray, orientations=9, pixels_per_cell=pixels_per_cell, cells_per_block=(2, 2), block_norm='L2-Hys')

LBP to extract texture from images. ['uniform'](https://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.local_binary_pattern) used to make result rotation invariant and to have a lower dimensional feature vector.

lbp_points and lbp_radius will be tuned by trying combinations for lbp_points = 8, 12 and lbp_radius = 1, 2

In [14]:
def extract_lbp_histogram(image, lbp_points, lbp_radius):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    lbp = local_binary_pattern(gray, P=lbp_points, R=lbp_radius, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=lbp_points + 2, range=(0, lbp_points + 2))
    hist = hist.astype("float")
    hist /= hist.sum()
    return hist

In [15]:
def extract_all_features_for_image(image, hog_pixels, lbp_points, lbp_radius):
    hsv_hist = extract_hsv_stats(image)
    laplace_var = extract_laplace_variance(image)
    lbp_hist = extract_lbp_histogram(image, lbp_points, lbp_radius)
    hog_feats = extract_hog_features(image, hog_pixels)
    all_features = np.hstack([hsv_hist, laplace_var, lbp_hist, hog_feats])
    return all_features

for each hyperparameter combination, extract features and save in a file

In [16]:
DATASETS_TO_PROCESS = ['dataset1', 'dataset2']
FEATURES_DIR = 'extracted_features'
os.makedirs(FEATURES_DIR, exist_ok=True)

hog_pixels_options = [(8, 8), (16, 16)]
lbp_points_options = [8, 12]
lbp_radius_options = [1, 2]
all_combinations = list(product(hog_pixels_options, lbp_points_options, lbp_radius_options))


for dataset_name in DATASETS_TO_PROCESS:
    print(f'\n---  {dataset_name} ---')
    
    # load preprocessed data

    filepath = os.path.join('preprocessed_data', f'{dataset_name}_processed.npz')
    with np.load(filepath, allow_pickle=True) as data:
        images, labels, class_map = data['images'], data['labels'], data['class_map'].item()

    for hog_pixels, lbp_points, lbp_radius in all_combinations:
        
        # create file name to save

        hog_str = f'hog{hog_pixels[0]}x{hog_pixels[1]}'
        lbp_str = f'lbp{lbp_points}p{lbp_radius}r'
        feature_set_name = f'{dataset_name}_{hog_str}_{lbp_str}'
        output_path = os.path.join(FEATURES_DIR, f'{feature_set_name}.npz')
        
        print(f'\nprocessing {feature_set_name}')
        
        if os.path.exists(output_path):
            print('\nfeatures already extracted for this combination, skipping...')
            continue
            
        start_time = time.time()
        
        features_list = []
        for image in images:
            features = extract_all_features_for_image(image, hog_pixels, lbp_points, lbp_radius)
            features_list.append(features)
        
        features_array = np.array(features_list)
        
        np.savez_compressed(output_path, features=features_array, labels=labels, class_map=class_map)
        
        end_time = time.time()
        print(f'\ncompleted extracting  for {len(images)} images in {end_time - start_time:.2f} seconds')


---  dataset1 ---

processing dataset1_hog8x8_lbp8p1r

completed extracting  for 6862 images in 105.54 seconds

processing dataset1_hog8x8_lbp8p2r

completed extracting  for 6862 images in 100.38 seconds

processing dataset1_hog8x8_lbp12p1r

completed extracting  for 6862 images in 107.59 seconds

processing dataset1_hog8x8_lbp12p2r

completed extracting  for 6862 images in 107.35 seconds

processing dataset1_hog16x16_lbp8p1r

completed extracting  for 6862 images in 55.32 seconds

processing dataset1_hog16x16_lbp8p2r

completed extracting  for 6862 images in 55.34 seconds

processing dataset1_hog16x16_lbp12p1r

completed extracting  for 6862 images in 66.35 seconds

processing dataset1_hog16x16_lbp12p2r

completed extracting  for 6862 images in 65.01 seconds

---  dataset2 ---

processing dataset2_hog8x8_lbp8p1r

completed extracting  for 3116 images in 44.59 seconds

processing dataset2_hog8x8_lbp8p2r

completed extracting  for 3116 images in 44.59 seconds

processing dataset2_hog8x