In [49]:
import numpy as np
import cv2
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
import os
import pandas as pd
from skimage.measure import moments, moments_central, shannon_entropy
from sklearn.cluster import KMeans
import mahotas
from tqdm import tqdm
from scipy.stats import skew, kurtosis
import pywt
from pyefd import elliptic_fourier_descriptors, normalize_efd, reconstruct_contour

In [None]:
# def extract_efd_features(image, n_harmonics=10):
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     blur = cv2.GaussianBlur(image, (3, 3), 0)
#     threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
#     area = np.count_nonzero(new_img)
#     contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     contour = max(contours, key=cv2.contourArea)
#     # Preprocess to get contour

    
#     # Convert OpenCV contour format to the format expected by PyEFD
#     contour_array = contour.reshape(-1, 2)
    
#     # Calculate EFD coefficients
#     coeffs = elliptic_fourier_descriptors(contour_array, order=n_harmonics, normalize=True)
    
#     # Flatten the coefficients into a feature vector
#     features = coeffs.flatten()
    
#     return {f"efd_{i}": features[i] for i in range(len(features))}

In [67]:
def extract_contour_moments(image):
    """Extract statistical moments of the contour"""
    # Preprocess image
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    
    # Convert to numpy array
    contour_array = contour.reshape(-1, 2)
    
    # Calculate centroid
    M = cv2.moments(contour)
    if M['m00'] != 0:
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])
    else:
        cx, cy = 0, 0
    
    # Calculate distances from centroid to all contour points
    distances = np.sqrt((contour_array[:, 0] - cx)**2 + (contour_array[:, 1] - cy)**2)
    
    # Calculate statistical features of distances
    mean_dist = np.mean(distances)
    std_dist = np.std(distances)
    min_dist = np.min(distances)
    max_dist = np.max(distances)
    
    # Calculate distance ratios
    dist_ratio = max_dist / (min_dist + 1e-6)
    
    # Calculate contour complexity (perimeter^2/area)
    perimeter = cv2.arcLength(contour, True)
    area = cv2.contourArea(contour)
    complexity = perimeter**2 / (4 * np.pi * area) if area > 0 else 0
    
    return {
        'contour_mean_dist': mean_dist,
        'contour_std_dist': std_dist,
        'contour_min_dist': min_dist,
        'contour_max_dist': max_dist,
        'contour_dist_ratio': dist_ratio,
        'contour_complexity': complexity
    }


In [51]:
def lbp_feature(image):
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = np.uint8(image)  # Convert to uint8 before applying LBP
    # i_max = np.max(image)
    # i_min = np.min(image)
    # if i_max - i_min != 0:
    #     image = (image - i_min) / (i_max - i_min)
    lbp = local_binary_pattern(image, R=1, P=8, method="uniform")  
    hist, bins = np.histogram(lbp.ravel(), bins=10, range=(0, 10))
    return {f"LBP_{i}": hist[i] for i in range(len(hist))}

In [52]:
def texture_feature(image):
    gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray_img],[0],None, [256], [0,256])
    hist = hist/hist.sum()

    intensities = np.arange(256)
    mean = np.sum(intensities * hist.flatten())
    std = np.sqrt(np.sum((intensities-mean)**2 *hist.flatten()))
    uniformity = np.sum(hist.flatten()**2)
    third_moment = np.sum((intensities-mean)**3 * hist.flatten())
    return {
        "texture_mean": mean,
        "texture_std": std,
        "texture_uniformity": uniformity,
        "texture_third_moment": third_moment
    }

In [53]:
def enhanced_color_feature(image): # 33 features
    def entropy(channel):
        return shannon_entropy(channel)
    def waveLet(channel):
        max_level = pywt.dwt_max_level(min(channel.shape), "db4")
        coeffs = pywt.wavedec2(channel, "db4", level=max_level)
        return np.mean(coeffs[0].ravel())
    
    # BGR
    B, G, R = cv2.split(image)
    mean_R, mean_G, mean_B = np.mean(R), np.mean(G), np.mean(B)
    # sqrt_R, sqrt_G, sqrt_B = np.sqrt(mean_R), np.sqrt(mean_G), np.sqrt(mean_B)
    std_R, std_G, std_B = np.std(R), np.std(G), np.std(B)
    skew_R, skew_G, skew_B = skew(R.flatten()), skew(G.flatten()), skew(B.flatten())
    kur_R, kur_G, kur_B = kurtosis(R.flatten()), kurtosis(G.flatten()), kurtosis(B.flatten())
    ent_R, ent_G, ent_B = entropy(R), entropy(G), entropy(B)
    wav_R, wav_G, wav_B = waveLet(R), waveLet(G), waveLet(B)
    
    # HSV
    hsv_img = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv_img)
    mean_h, mean_s, mean_v = np.mean(h), np.mean(s), np.mean(v)
    std_h, std_s, std_v = np.std(h), np.std(s), np.std(v)
    # sqrt_h, sqrt_s, sqrt_v = np.sqrt(mean_h), np.sqrt(mean_s), np.sqrt(mean_v)
    skew_h, skew_s, skew_v = skew(h.flatten()), skew(s.flatten()), skew(v.flatten())
    kur_h, kur_s, kur_v = kurtosis(h.flatten()), kurtosis(s.flatten()), kurtosis(v.flatten())
    ent_h, ent_s, ent_v = entropy(h), entropy(s), entropy(v)
    wav_h, wav_s, wav_v = waveLet(h), waveLet(s), waveLet(v)
    
    # Lab
    lab_img = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab_img)
    mean_l, mean_a, mean_b = np.mean(l), np.mean(a), np.mean(b)
    std_l, std_a, std_b = np.std(l), np.std(a), np.std(b)
    # sqrt_l, sqrt_a, sqrt_b = np.sqrt(mean_l), np.sqrt(mean_a), np.sqrt(mean_b)
    skew_l, skew_a, skew_b = skew(l.flatten()), skew(a.flatten()), skew(b.flatten())
    kur_l, kur_a, kur_b = kurtosis(l.flatten()), kurtosis(a.flatten()), kurtosis(b.flatten())
    ent_l, ent_a, ent_b = entropy(l), entropy(a), entropy(b)
    wav_l, wav_a, wav_b = waveLet(l), waveLet(a), waveLet(b)
    
    # YCbCr
    ycrcb_img = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
    y, cr, cb = cv2.split(ycrcb_img)
    mean_y, mean_cr, mean_cb = np.mean(y), np.mean(cr), np.mean(cb)
    std_y, std_cr, std_cb = np.std(y), np.std(cr), np.std(cb)
    # sqrt_y, sqrt_cr, sqrt_cb = np.sqrt(mean_y), np.sqrt(mean_cr), np.sqrt(mean_cb)
    skew_y, skew_cr, skew_cb = skew(y.flatten()), skew(cr.flatten()), skew(cb.flatten())
    kur_y, kur_cr, kur_cb = kurtosis(y.flatten()), kurtosis(cr.flatten()), kurtosis(cb.flatten())
    ent_y, ent_cr, ent_cb = entropy(y), entropy(cr), entropy(cb)
    wav_y, wav_cr, wav_cb = waveLet(y), waveLet(cr), waveLet(cb)
    
    # XYZ
    xyz_img = cv2.cvtColor(image, cv2.COLOR_BGR2XYZ)
    X, Y, Z = cv2.split(xyz_img)
    mean_X, mean_Y, mean_Z = np.mean(X), np.mean(Y), np.mean(Z)
    std_X, std_Y, std_Z = np.std(X), np.std(Y), np.std(Z)
    skew_X, skew_Y, skew_Z = skew(X.flatten()), skew(Y.flatten()), skew(Z.flatten())
    kur_X, kur_Y, kur_Z = kurtosis(X.flatten()), kurtosis(Y.flatten()), kurtosis(Z.flatten())
    ent_X, ent_Y, ent_Z = entropy(X), entropy(Y), entropy(Z)
    wav_X, wav_Y, wav_Z = waveLet(X), waveLet(Y), waveLet(Z)
    
    return {"mean_r": mean_R, "mean_g": mean_G, "mean_B": mean_B,
            # "sqrt_r": sqrt_R, "sqrt_g": sqrt_G, "sqrt_B": sqrt_B,
            "std_r": std_R, "std_g": std_G, "std_B": std_B,
            "skew_r": skew_R, "skew_g": skew_G, "skew_B": skew_B,
            "kur_r": kur_R, "kur_g": kur_G, "kur_B": kur_B,
            "ent_r": ent_R, "ent_g": ent_G, "ent_B": ent_B,
            "wav_r": wav_R, "wav_g": wav_G, "wav_B": wav_B,
            
            "mean_h": mean_h, "mean_s": mean_s, "mean_v": mean_v,
            "std_h": std_h, "std_s": std_s, "std_v": std_v,
            # "sqrt_h": sqrt_h, "sqrt_s": sqrt_s, "sqrt_v": sqrt_v,
            "skew_h": skew_h, "skew_s": skew_s, "skew_v": skew_v,
            "kur_h": kur_h, "kur_s": kur_s, "kur_v": kur_v,
            "ent_h": ent_h, "ent_s": ent_s, "ent_v": ent_v,
            "wav_h": wav_h, "wav_s": wav_s, "wav_v": wav_v,
            
            "mean_l": mean_l, "mean_a": mean_a, "mean_b": mean_b,
            "std_l": std_l, "std_a": std_a, "std_b": std_b,
            # "sqrt_l": sqrt_l, "sqrt_a": sqrt_a, "sqrt_b": sqrt_b,
            "skew_l": skew_l, "skew_a": skew_a, "skew_b": skew_b,
            "kur_l": kur_l, "kur_a": kur_a, "kur_b": kur_b,
            "ent_l": ent_l, "ent_a": ent_a, "ent_b": ent_b,
            "wav_l": wav_l, "wav_a": wav_a, "wav_b": wav_b,
            
            "mean_y": mean_y, "mean_cb": mean_cb, "mean_cr": mean_cr,
            "std_y": std_y, "std_cb": std_cb, "std_cr": std_cr,
            # "sqrt_y": sqrt_y, "sqrt_cb": sqrt_cb, "sqrt_cr": sqrt_cr,
            "skew_y": skew_y, "skew_cb": skew_cb, "skew_cr": skew_cr,
            "kur_y": kur_y, "kur_cb": kur_cb, "kur_cr": kur_cr,
            "ent_y": ent_y, "ent_cb": ent_cb, "ent_cr": ent_cr,
            "wav_y": wav_y, "wav_cb": wav_cb, "wav_cr": wav_cr,
            
            "mean_x": mean_X, "mean_Y": mean_Y, "mean_z": mean_Z,
            "std_x": std_X, "std_Y": std_Y, "std_z": std_Z,
            "skew_x": skew_X, "skew_Y": skew_Y, "skew_z": skew_Z,
            "kur_x": kur_X, "kur_Y": kur_Y, "kur_z": kur_Z,
            "ent_x": ent_X, "ent_Y": ent_Y, "ent_z": ent_Z,
            "wav_x": wav_X, "wav_Y": wav_Y, "wav_z": wav_Z
            }

In [54]:
def zernike_feature(image, degree=8):
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    
    mask = np.zeros(new_img.shape, dtype=np.uint8)
    cv2.drawContours(mask, [contour], -1, 255, -1)  
    
    # Convert mask to binary for mahotas
    binary_mask = (mask > 0).astype(np.uint8)
    

    (x_center, y_center), radius = cv2.minEnclosingCircle(contour)

    radius = int(np.ceil(radius))
    

    x_center = int(x_center)
    y_center = int(y_center)
    x1 = max(x_center - radius, 0)
    y1 = max(y_center - radius, 0)
    x2 = x_center + radius
    y2 = y_center + radius
    
    cropped_mask = binary_mask[y1:y2, x1:x2]
    
    # padding
    h, w = cropped_mask.shape
    if h != w:
        size = max(h, w)
        square_mask = np.zeros((size, size), dtype=np.uint8)
        y_offset = (size - h) // 2
        x_offset = (size - w) // 2
        square_mask[y_offset:y_offset+h, x_offset:x_offset+w] = cropped_mask
    else:
        square_mask = cropped_mask

    effective_radius = square_mask.shape[0] // 2
    zernike_moments = mahotas.features.zernike_moments(square_mask, effective_radius, degree)
    
    return {f'zernike_{i}': zernike_moments[i] for i in range(len(zernike_moments))}

In [56]:
def hu_moments(image):
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    moments = cv2.moments(contour)
    hu_moments = cv2.HuMoments(moments)
    hu_moments = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-10)
    hu_features = {
    "hu_1": float(hu_moments[0][0]),
    "hu_2": float(hu_moments[1][0]),
    "hu_3": float(hu_moments[2][0]),
    "hu_4": float(hu_moments[3][0]),
    "hu_5": float(hu_moments[4][0]),
    "hu_6": float(hu_moments[5][0]),
    "hu_7": float(hu_moments[6][0]),
    }

    return hu_features


In [57]:
def basic_feature(image): # 8 features
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    area = np.count_nonzero(new_img)
    contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    peri = cv2.arcLength(contour, True)
    x, y, w, h = cv2.boundingRect(contour)
    
    length = x + w
    width = y + h
    ratio = length / width
    
    ellipse = cv2.fitEllipse(contour)
    
    major_axis = max(ellipse[1])
    minor_axis = min(ellipse[1])
    
    hull = cv2.convexHull(contour)
    hull_area = cv2.contourArea(hull)
    hull_perimeter = cv2.arcLength(hull, True)
    
    sf1 = major_axis / area
    sf2 = minor_axis / area
    sf3 = area / ((0.5 * major_axis)**2 * np.pi)
    sf4 = area / (0.5**2 * major_axis * minor_axis * np.pi)
    
    ed = np.sqrt(4 * area / np.pi)
    ar = major_axis / minor_axis
    roundness = (4 * area * np.pi) / peri**2
    Co = ed / major_axis
    solid = area / hull_area
    
    return {
        "area": area,
        "length": length,
        "width": width,
        "ratio": ratio,
        "major_axis_length": major_axis,
        "minor_axis_length": minor_axis,
        "convex_hull_area": hull_area,
        "convex_hull_perimeter": hull_perimeter,
        "shape_factor_1": sf1,
        "shape_factor_2": sf2,
        "shape_factor_3": sf3,
        "shape_factor_4": sf4,
        "equivalent_diameter": ed,
        "aspect_ratio": ar,
        "perimeter": peri,
        "roundness": roundness,
        "compactness": Co,
        "solidity": solid
    }

In [58]:
from scipy.ndimage import convolve
def extract_edge_histogram_features(image):

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    kernels = {
        "vertical": np.array([[-1,  2, -1],
                              [-1,  2, -1],
                              [-1,  2, -1]], dtype=np.float32),
        "horizontal": np.array([[-1, -1, -1],
                                [ 2,  2,  2],
                                [-1, -1, -1]], dtype=np.float32),
        "diag_45": np.array([[-1, -1,  2],
                             [-1,  2, -1],
                             [ 2, -1, -1]], dtype=np.float32),
        "diag_135": np.array([[ 2, -1, -1],
                              [-1,  2, -1],
                              [-1, -1,  2]], dtype=np.float32),
        # "non_directional": np.array([[1,  1,  1],
        #                              [1, -8,  1],
        #                              [1,  1,  1]], dtype=np.float32)
    }
    
    features = {}
    total_energy = 0.0
    
    for key, kernel in kernels.items():
        response = convolve(gray.astype(np.float32), kernel, mode="reflect")
        energy = np.sum(np.abs(response))
        features[f"edge_energy_{key}"] = energy
        total_energy += energy
    
    if total_energy > 0:
        for key in list(features.keys()):
            features[key] /= total_energy

    return features

In [None]:
def extract_edge_histogram_features_2(image):
    # Convert to grayscale if necessary
    if len(image.shape) > 2:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image.copy()
    
    # Define convolution kernels for a variety of edge types.
    kernels = {
        # "vertical": np.array([[-1,  2, -1],
        #                       [-1,  2, -1],
        #                       [-1,  2, -1]], dtype=np.float32),
        # "horizontal": np.array([[-1, -1, -1],
        #                         [ 2,  2,  2],
        #                         [-1, -1, -1]], dtype=np.float32),
        # "diag_45": np.array([[-1, -1,  2],
        #                      [-1,  2, -1],
        #                      [ 2, -1, -1]], dtype=np.float32),
        # "diag_135": np.array([[ 2, -1, -1],
        #                       [-1,  2, -1],
        #                       [-1, -1,  2]], dtype=np.float32),
        # "non_directional": np.array([[1,  1,  1],
        #                              [1, -8,  1],
        #                              [1,  1,  1]], dtype=np.float32),
        # Sobel Operators
        "sobel_x": np.array([[-1, 0, 1],
                             [-2, 0, 2],
                             [-1, 0, 1]], dtype=np.float32),
        "sobel_y": np.array([[-1, -2, -1],
                             [ 0,  0,  0],
                             [ 1,  2,  1]], dtype=np.float32),
        # Prewitt Operators
        "prewitt_x": np.array([[-1, 0, 1],
                               [-1, 0, 1],
                               [-1, 0, 1]], dtype=np.float32),
        "prewitt_y": np.array([[-1, -1, -1],
                               [ 0,  0,  0],
                               [ 1,  1,  1]], dtype=np.float32),
        # Laplacian Operators
        "laplacian_4": np.array([[0,  1,  0],
                                 [1, -4,  1],
                                 [0,  1,  0]], dtype=np.float32),
        "laplacian_8": np.array([[1,  1,  1],
                                 [1, -8,  1],
                                 [1,  1,  1]], dtype=np.float32),
        # Roberts Cross Operators
        "roberts_x": np.array([[1, 0],
                               [0, -1]], dtype=np.float32),
        "roberts_y": np.array([[0, 1],
                               [-1, 0]], dtype=np.float32),
        # Scharr Operators
        "scharr_x": np.array([[-3,  0,  3],
                              [-10, 0, 10],
                              [-3,  0,  3]], dtype=np.float32),
        "scharr_y": np.array([[-3, -10, -3],
                              [ 0,   0,  0],
                              [ 3,  10,  3]], dtype=np.float32),
    }
    
    features = {}
    total_energy = 0.0
    
    # Convolve the image with each kernel and compute the energy.
    for key, kernel in kernels.items():
        response = convolve(gray.astype(np.float32), kernel, mode="reflect")
        energy = np.sum(np.abs(response))
        features[f"edge_energy_2_{key}"] = energy
        total_energy += energy
    
    # Normalize the features by the total energy.
    if total_energy > 0:
        for key in features.keys():
            features[key] /= total_energy

    return features

In [60]:
def compute_glcm_descriptor(image):
    if image is None:
        return None
    
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    distance = [3]  
    # angles = [0, np.pi/8, np.pi/4, 3*np.pi/8, np.pi/2, 5*np.pi/8, 3*np.pi/4, 7*np.pi/8]
    # properties = ['contrast', 'correlation', 'energy', 'homogeneity', 'dissimilarity']
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4] 
    properties = ['contrast', 'correlation', 'energy', 'homogeneity']  
    
    glcm = graycomatrix(image, distances=distance, angles=angles, symmetric=True, normed=True)
    
    features = []
    for prop in properties:
        feature = graycoprops(glcm, prop).flatten()
        features.extend(feature)
    
    return np.array(features)

In [61]:
def create_gabor_filter(size, u0, v0, delta_x, delta_y):
    #size: kernel size
    #u0, v0: spatial frequency points
    #delta_x, delta_y: spatial scales

    y, x = np.mgrid[-size//2:size//2, -size//2:size//2]
    
    gaussian = np.exp(-0.5 * (x**2/delta_x**2 + y**2/delta_y**2))

    sinusoid = np.exp(-2j * np.pi * (u0*x + v0*y))
    
    return gaussian * sinusoid

def compute_gist_descriptor(image):

    if image is None:
        return None
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = image.astype(np.float32) / 255.0
    
    local_mean = cv2.GaussianBlur(image, (5, 5), 1.0)
    variance = cv2.GaussianBlur(image**2, (5, 5), 1.0) - local_mean**2
    variance[variance < 0] = 0 
    local_std = np.sqrt(variance)
    image = (image - local_mean) / (local_std + 1e-8)
    
    features = []
    scales = [2, 4, 8, 16]
    orientations = 8
    
    for delta in scales:
        for theta in range(orientations):
            angle = theta * np.pi / orientations
            u0 = np.cos(angle) / delta
            v0 = np.sin(angle) / delta
            
            gabor_filter = create_gabor_filter(size=31,u0=u0,v0=v0,delta_x=delta,delta_y=delta)
            
            filtered = cv2.filter2D(image, cv2.CV_32F, np.real(gabor_filter))
            
            block_h = image.shape[0] // 4
            block_w = image.shape[1] // 4
            
            for i in range(4):
                for j in range(4):
                    block = filtered[i*block_h:(i+1)*block_h, j*block_w:(j+1)*block_w]
                    energy = np.mean(np.abs(block))
                    features.append(energy)
    
    return np.array(features)


In [62]:
def extract_css_features_fixed(image, num_features=20):
    """Extract Curvature Scale Space features from rice seed contour"""
    # Preprocess image
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    
    # Convert to numpy array
    contour_array = contour.reshape(-1, 2)
    
    # Ensure enough points for analysis
    if len(contour_array) < 50:
        # Interpolate to get more points
        t = np.arange(len(contour_array))
        ti = np.linspace(0, t[-1], 100)
        x = np.interp(ti, t, contour_array[:, 0])
        y = np.interp(ti, t, contour_array[:, 1])
        contour_array = np.column_stack((x, y))
    
    # Initialize features
    features = {}
    
    # Define scales for CSS
    scales = np.linspace(1, 10, num_features)
    
    # For each scale, smooth the contour first, then calculate curvature
    for i, sigma in enumerate(scales):
        # Get x and y coordinates
        x = contour_array[:, 0]
        y = contour_array[:, 1]
        
        # Create kernel size based on sigma (must be odd)
        ksize = int(2 * np.ceil(3 * sigma) + 1)
        
        # Use 1D Gaussian filter from scipy instead of OpenCV
        from scipy.ndimage import gaussian_filter1d
        x_smooth = gaussian_filter1d(x, sigma)
        y_smooth = gaussian_filter1d(y, sigma)
        
        # Calculate derivatives of the smoothed contour
        dx = np.gradient(x_smooth)
        dy = np.gradient(y_smooth)
        ddx = np.gradient(dx)
        ddy = np.gradient(dy)
        
        # Calculate curvature
        curvature = np.abs((dx * ddy - dy * ddx) / (dx**2 + dy**2)**(3/2))
        
        # Replace NaN values (from division by zero) with zeros
        curvature = np.nan_to_num(curvature)
        
        # Find zero crossings in curvature
        # For curvature, look for local maxima (peaks) which are more meaningful
        peaks = np.where((curvature[1:-1] > curvature[:-2]) & 
                         (curvature[1:-1] > curvature[2:]))[0] + 1
        
        # Store features
        features[f'css_peaks_scale_{i+1}'] = len(peaks)
        features[f'css_max_curve_scale_{i+1}'] = np.max(curvature)
        features[f'css_mean_curve_scale_{i+1}'] = np.mean(curvature)
        features[f'css_std_curve_scale_{i+1}'] = np.std(curvature)
    
    return features


In [63]:
def extract_all_features(image):    
    features = {}
    #10
    lbp_hist = lbp_feature(image) 
    features.update(lbp_hist) 
    #4
    texture_features = texture_feature(image)
    features.update(texture_features) 
    #7
    hu_moments_features = hu_moments(image)
    features.update(hu_moments_features)
    #25
    zernike_moment_features = zernike_feature(image)
    features.update(zernike_moment_features)
    
    efd_feats = extract_efd_features(image)
    features.update(efd_feats)
    css_feats = extract_css_features_fixed(image)
    features.update(css_feats)
    # color_structure_features = color_structure_descriptor(image)
    # features.update(color_structure_features)
    
    #4
    # dominant_color_features = dominant_color_feature(image)
    # features.update(dominant_color_features)    
    
    #wavelet
    # wavelet_feats = wavelet_features(image)
    # features.update(wavelet_feats)
    #12
    color_features = enhanced_color_feature(image)
    features.update(color_features) 
    #5
    edge_histogram_features = extract_edge_histogram_features(image)
    features.update(edge_histogram_features)
    
    #
    edge_histogram_features_2 = extract_edge_histogram_features_2(image)
    features.update(edge_histogram_features_2)
    
    # color_features = color_feature(image)
    # features.update(color_features) 
    #8
    shape_features = basic_feature(image)
    if shape_features is not None:
        features.update(shape_features)
    #16
    glcm_features = compute_glcm_descriptor(image)
    if glcm_features is not None:
        for i, val in enumerate(glcm_features):
            features[f'GLCM_{i}'] = val
    #512
    # gist_features = compute_gist_descriptor(image)
    # if gist_features is not None:
    #     for i, val in enumerate(gist_features):
    #         features[f'GIST_{i}'] = val

    return features


def process_directory(base_path):
    all_data = []
    image_paths = []

    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                if 'negative' in root:
                    label = 0
                else:
                    label = 1
                image_paths.append((os.path.join(root, file), label))
    for image_path, label in tqdm(image_paths, desc="Processing Images"):
        image = cv2.imread(image_path)
        if image is None:
            continue  

        features = extract_all_features(image)
        features["Label"] = label
 
        all_data.append(features)

    df = pd.DataFrame(all_data)

    return df

In [64]:

base_path = "/home/duyle/Rice_photos/rice_seed/TBR"

In [65]:
base_path = r'/home/duyle/Documents/AIL/rice_seed/TBR-45'
df = process_directory(base_path)

  curvature = np.abs((dx * ddy - dy * ddx) / (dx**2 + dy**2)**(3/2))
Processing Images: 100%|██████████| 2337/2337 [03:41<00:00, 10.53it/s]


In [66]:
df.to_csv(f'new_file_efd_TBR-45-2.csv',index=False)

In [22]:
types = ['BC-15','Xi-23']
for type in types:
    df = process_directory(f'/home/duyle/Documents/AIL/rice_seed/{type}')
    df.to_csv(f'new_file_{type}.csv',index=False)

Processing Images:   0%|          | 0/3759 [00:00<?, ?it/s]

Processing Images: 100%|██████████| 3759/3759 [06:01<00:00, 10.40it/s]
Processing Images: 100%|██████████| 4579/4579 [06:53<00:00, 11.08it/s]


In [None]:
types = ['BC-15','Huong_thom-1','Nep-87','Q-5','Thien_uu-8','Xi-23', 'TBR-36', 'TBR_45', 'TH3-5']
for type in types:
    df = process_directory(f'/home/duyle/Rice_photos/rice_seed/{type}')
    df.to_csv(f'all_with_zernike_pluscolor_enhanced_edge_{type}.csv',index=False)

Processing Images: 100%|██████████| 3422/3422 [03:20<00:00, 17.06it/s]
Processing Images: 100%|██████████| 4316/4316 [04:00<00:00, 17.92it/s]
Processing Images: 100%|██████████| 2867/2867 [02:54<00:00, 16.43it/s]
Processing Images: 100%|██████████| 3944/3944 [03:47<00:00, 17.36it/s]
Processing Images: 100%|██████████| 2103/2103 [02:01<00:00, 17.32it/s]
Processing Images: 100%|██████████| 3872/3872 [03:43<00:00, 17.33it/s]
Processing Images: 100%|██████████| 2328/2328 [01:54<00:00, 20.30it/s]
Processing Images: 0it [00:00, ?it/s]
Processing Images: 100%|██████████| 2074/2074 [01:48<00:00, 19.07it/s]


In [67]:
df

Unnamed: 0,LBP_0,LBP_1,LBP_2,LBP_3,LBP_4,LBP_5,LBP_6,LBP_7,LBP_8,LBP_9,...,GLCM_7,GLCM_8,GLCM_9,GLCM_10,GLCM_11,GLCM_12,GLCM_13,GLCM_14,GLCM_15,Label
0,249,667,355,1761,2894,2749,1194,921,1615,1329,...,0.919322,0.087261,0.076275,0.072825,0.076559,0.356618,0.279062,0.248866,0.286045,1
1,344,833,411,1739,2987,2735,1278,798,1460,1489,...,0.899277,0.077756,0.067537,0.063615,0.068074,0.340384,0.262537,0.228819,0.257738,1
2,485,1047,672,2118,3141,3007,1522,1202,2016,1966,...,0.910777,0.086017,0.078133,0.074423,0.078500,0.344613,0.284887,0.255701,0.285575,1
3,567,997,707,1824,2960,2570,1392,1184,1621,1873,...,0.903198,0.071537,0.063342,0.061175,0.064125,0.305813,0.246045,0.218532,0.250488,1
4,541,1004,560,1536,2739,2108,1078,1067,1325,2058,...,0.859189,0.064338,0.055301,0.052004,0.055064,0.291228,0.230163,0.199191,0.223099,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3417,549,1109,800,2156,3089,2942,1720,1276,2028,2061,...,0.917721,0.089944,0.083424,0.080165,0.083763,0.329930,0.285341,0.260048,0.288865,0
3418,325,909,524,2151,3341,3508,1509,1149,2382,2454,...,0.939424,0.110095,0.100542,0.097592,0.102480,0.380486,0.310400,0.287083,0.329649,0
3419,449,1067,592,2365,3376,3380,1773,1366,2238,2114,...,0.932853,0.077291,0.070693,0.067480,0.071971,0.376489,0.319709,0.289111,0.330268,0
3420,334,725,447,1605,3184,2582,1185,817,1292,1401,...,0.927336,0.070488,0.060200,0.055431,0.060148,0.334753,0.261893,0.232836,0.263993,0
