In [12]:
import numpy as np
import cv2
import pandas as pd
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from math import sqrt
import os
from tqdm import tqdm
import mahotas

In [13]:
def lbp_feature(image):
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    i_max = np.max(image)
    i_min = np.min(image)
    if i_max - i_min != 0:
        image = (image - i_min) / (i_max - i_min)
    lbp = local_binary_pattern(image, R=1, P=8, method="uniform")  
    hist, bins = np.histogram(lbp.ravel(), bins=10, range=(0, 10))
    return {f"LBP_{i}": hist[i] for i in range(len(hist))}

In [14]:
def texture_feature(image):
    gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray_img],[0],None, [256], [0,256])
    hist = hist/hist.sum()

    intensities = np.arange(256)
    mean = np.sum(intensities * hist.flatten())
    std = np.sqrt(np.sum((intensities-mean)**2 *hist.flatten()))
    uniformity = np.sum(hist.flatten()**2)
    third_moment = np.sum((intensities-mean)**3 * hist.flatten())
    return {
        "texture_mean": mean,
        "texture_std": std,
        "texture_uniformity": uniformity,
        "texture_third_moment": third_moment
    }

In [15]:
def color_feature(image):
    b, g, r = cv2.split(image)
    mean_r, mean_g, mean_b = np.mean(r), np.mean(g), np.mean(b)
    rs, gs, bs = sqrt(mean_r), np.sqrt(mean_g), np.sqrt(mean_b)
    return {"mean_r": mean_r,
        "mean_g": mean_g,
        "mean_b": mean_b,
        "red_sqr": rs,
        "green_sqr": gs,
        "blue_sqr": bs,
    }

In [45]:
def zernike_feature(image, degree=7):
    """
    Compute Zernike moments of the largest object in the image.
    
    Parameters:
      image: input image (BGR or grayscale)
      degree: maximum degree (order) for Zernike moments computation
    
    Returns:
      A numpy array containing the Zernike moments.
    """
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    
    mask = np.zeros(new_img.shape, dtype=np.uint8)
    cv2.drawContours(mask, [contour], -1, 255, -1)  # fill the contour
    # Convert mask to binary (values 0 and 1) for mahotas
    binary_mask = (mask > 0).astype(np.uint8)
    

    (x_center, y_center), radius = cv2.minEnclosingCircle(contour)

    radius = int(np.ceil(radius))
    

    x_center = int(x_center)
    y_center = int(y_center)
    x1 = max(x_center - radius, 0)
    y1 = max(y_center - radius, 0)
    x2 = x_center + radius
    y2 = y_center + radius
    
    # Adjust if the region goes beyond image borders
    cropped_mask = binary_mask[y1:y2, x1:x2]
    
    # Ensure the cropped mask is square; if not, pad accordingly.
    h, w = cropped_mask.shape
    if h != w:
        size = max(h, w)
        square_mask = np.zeros((size, size), dtype=np.uint8)
        # Compute padding offsets
        y_offset = (size - h) // 2
        x_offset = (size - w) // 2
        square_mask[y_offset:y_offset+h, x_offset:x_offset+w] = cropped_mask
    else:
        square_mask = cropped_mask

    # Now compute the Zernike moments. The function requires the radius (in pixels) of the circle.
    # Here, we choose the radius as half the size of the square mask.
    effective_radius = square_mask.shape[0] // 2
    zernike_moments = mahotas.features.zernike_moments(square_mask, effective_radius, degree)
    
    return {f'zernike_{i}': zernike_moments[i] for i in range(len(zernike_moments))}

In [None]:
def color_structure_descriptor(image, grid_size=(8, 8), bins=32):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    
    height, width = hsv_image.shape[:2]
    cell_h, cell_w = height // grid_size[0], width // grid_size[1]

    
    hist_bins = [bins] 

    
    csd = []

    for row in range(grid_size[0]):
        for col in range(grid_size[1]):
            x_start, y_start = col * cell_w, row * cell_h
            x_end, y_end = x_start + cell_w, y_start + cell_h
            cell = hsv_image[y_start:y_end, x_start:x_end]

            hist = cv2.calcHist([cell], [0], None, hist_bins, [0, 180])
            hist = hist.flatten() 
            hist = hist / hist.sum() if hist.sum() != 0 else hist

            csd.extend(hist)

    return {f"csd_{i}":csd[i] for i in range(len(csd))}


In [25]:
def hu_moments(image):
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    moments = cv2.moments(contour)
    hu_moments = cv2.HuMoments(moments)
    hu_moments = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-10)
    return {f"hu_{i}": hu_moments[i] for i in range(len(hu_moments))}

In [17]:
def basic_feature(image):
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    threshold, new_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    area = np.count_nonzero(new_img)
    contours, _ = cv2.findContours(new_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(contour)
    
    length = x + w
    width = y + h
    ratio = length / width
    
    ellipse = cv2.fitEllipse(contour)
    
    major_axis_length = max(ellipse[1])
    minor_axis_length = min(ellipse[1])

    hull = cv2.convexHull(contour)
    hull_area = cv2.contourArea(hull)
    hull_perimeter = cv2.arcLength(hull, True)
    return {
        "area": area,
        "length": length,
        "width": width,
        "ratio": ratio,
        "major_axis_length": major_axis_length,
        "minor_axis_length": minor_axis_length,
        "convex_hull_area": hull_area,
        "convex_hull_perimeter": hull_perimeter,
    }

In [18]:
def compute_glcm_descriptor(image):
    if image is None:
        return None
    
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    distance = [3]  
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4] 
    properties = ['contrast', 'correlation', 'energy', 'homogeneity']  
    
    glcm = graycomatrix(image, distances=distance, angles=angles, symmetric=True, normed=True)
    
    features = []
    for prop in properties:
        feature = graycoprops(glcm, prop).flatten()
        features.extend(feature)
    
    return np.array(features)

In [44]:
def create_gabor_filter(size, u0, v0, delta_x, delta_y):
    #size: kernel size
    #u0, v0: spatial frequency points
    #delta_x, delta_y: spatial scales

    y, x = np.mgrid[-size//2:size//2, -size//2:size//2]
    
    gaussian = np.exp(-0.5 * (x**2/delta_x**2 + y**2/delta_y**2))

    sinusoid = np.exp(-2j * np.pi * (u0*x + v0*y))
    
    return gaussian * sinusoid

def compute_gist_descriptor(image):

    if image is None:
        return None
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = image.astype(np.float32) / 255.0
    
    local_mean = cv2.GaussianBlur(image, (5, 5), 1.0)
    variance = cv2.GaussianBlur(image**2, (5, 5), 1.0) - local_mean**2
    variance[variance < 0] = 0 
    local_std = np.sqrt(variance)
    image = (image - local_mean) / (local_std + 1e-8)
    
    features = []
    scales = [2, 4, 8, 16]
    orientations = 8
    
    for delta in scales:
        for theta in range(orientations):
            angle = theta * np.pi / orientations
            u0 = np.cos(angle) / delta
            v0 = np.sin(angle) / delta
            
            gabor_filter = create_gabor_filter(size=31,u0=u0,v0=v0,delta_x=delta,delta_y=delta)
            
            filtered = cv2.filter2D(image, cv2.CV_32F, np.real(gabor_filter))
            
            block_h = image.shape[0] // 4
            block_w = image.shape[1] // 4
            
            for i in range(4):
                for j in range(4):
                    block = filtered[i*block_h:(i+1)*block_h, j*block_w:(j+1)*block_w]
                    energy = np.mean(np.abs(block))
                    features.append(energy)
    
    return np.array(features)


In [46]:
def extract_all_features(image):    
    features = {}

    # lbp_hist = lbp_feature(image)
    # features.update(lbp_hist) 
    
    texture_features = texture_feature(image)
    features.update(texture_features) 

    # hu_moments_features = hu_moments(image)
    # features.update(hu_moments_features)

    zernike_moment_features = zernike_feature(image)
    features.update(zernike_moment_features)
    
    color_features = color_feature(image)
    features.update(color_features) 

    shape_features = basic_feature(image)
    if shape_features is not None:
        features.update(shape_features)

    # glcm_features = compute_glcm_descriptor(image)
    # if glcm_features is not None:
    #     for i, val in enumerate(glcm_features):
    #         features[f'GLCM_{i}'] = val

    # gist_features = compute_gist_descriptor(image)
    # if gist_features is not None:
    #     for i, val in enumerate(gist_features):
    #         features[f'GIST_{i}'] = val

    return features


def process_directory(base_path):
    all_data = []
    image_paths = []

    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                label = 0 if 'Negative' in root else 1
                image_paths.append((os.path.join(root, file), label))

    for image_path, label in tqdm(image_paths, desc="Processing Images"):
        image = cv2.imread(image_path)
        if image is None:
            continue  

        features = extract_all_features(image)
        features["Label"] = label
 
        all_data.append(features)

    df = pd.DataFrame(all_data)

    return df

In [47]:
base_path = r'/home/duyle/Rice_photos/BC-15'
df = process_directory(base_path)

Processing Images: 100%|██████████| 3677/3677 [00:37<00:00, 98.69it/s] 


In [34]:
types = ['Huongthom','Nep87','Q5','Thien_uu','Xi23']
for type in types:
    df = process_directory(f'/home/duyle/Rice_photos/{type}')
    df.to_csv(f'all_with_zernike_{type}.csv',index=False)

Processing Images: 100%|██████████| 4150/4150 [03:53<00:00, 17.76it/s]
Processing Images: 100%|██████████| 2873/2873 [02:36<00:00, 18.38it/s]
Processing Images: 100%|██████████| 3010/3010 [02:51<00:00, 17.55it/s]
Processing Images: 100%|██████████| 2006/2006 [02:00<00:00, 16.60it/s]
Processing Images: 100%|██████████| 4145/4145 [04:28<00:00, 15.43it/s]


In [48]:
df

Unnamed: 0,texture_mean,texture_std,texture_uniformity,texture_third_moment,zernike_0,zernike_1,zernike_2,zernike_3,zernike_4,zernike_5,...,blue_sqr,area,length,width,ratio,major_axis_length,minor_axis_length,convex_hull_area,convex_hull_perimeter,Label
0,152.819736,20.038248,0.039437,3596.159707,0.31831,1.502534e-04,0.462525,0.196733,0.013276,0.014046,...,9.844644,12715,234,76,3.078947,222.841080,73.246323,12846.0,504.984424,0
1,145.484199,25.422890,0.040703,7659.362885,0.31831,5.507275e-04,0.470892,0.193626,0.025171,0.012693,...,9.724670,10462,212,71,2.985915,200.241364,66.632545,10546.5,457.492825,0
2,145.106173,23.235121,0.037482,6440.521765,0.31831,1.746873e-15,0.456747,0.193269,0.017059,0.010133,...,9.765762,10910,208,74,2.810811,196.175446,70.593536,10960.0,454.703499,0
3,153.339070,21.440033,0.041950,4652.943119,0.31831,6.199011e-04,0.463229,0.187735,0.011793,0.012145,...,9.852617,11398,211,75,2.813333,200.679810,72.684647,11476.0,467.173310,0
4,140.540285,30.972378,0.030548,7522.928417,0.31831,3.607720e-04,0.474892,0.187633,0.025607,0.026244,...,9.372739,12380,231,77,3.000000,213.790176,75.759109,12794.5,501.815039,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3672,139.799828,30.436637,0.033865,5615.198825,0.31831,2.206260e-04,0.481884,0.199837,0.026296,0.015629,...,9.477512,11610,240,71,3.380282,227.888321,65.534615,11902.0,512.573571,1
3673,154.848193,17.776393,0.043520,2010.044399,0.31831,1.209034e-04,0.478281,0.187704,0.016870,0.013741,...,10.004896,10575,214,72,2.972222,200.312515,67.399651,10729.5,461.124763,1
3674,138.229146,26.467132,0.037670,9395.690582,0.31831,6.532614e-04,0.462514,0.201591,0.005101,0.005455,...,9.252532,10770,220,69,3.188406,209.212097,65.828743,10929.0,472.594579,1
3675,145.538925,23.635546,0.039149,2987.048113,0.31831,2.047679e-04,0.491030,0.201622,0.027571,0.013138,...,9.576948,9388,228,62,3.677419,214.034332,55.337917,9476.5,477.018385,1


In [49]:
df.to_csv('basic_with_zernikebc15.csv',index=False)