In [1]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
import glob

In [2]:
####
# for each image it calculates all the glcm based texture features
def glcm(binary_image):

    df = pd.DataFrame()

    glcm_1 = graycomatrix(binary_image, [1], [0])

    df["corr_1"] = graycoprops(glcm_1, "correlation")[0]
    df["dis_sim_1"] = graycoprops(glcm_1, "dissimilarity")[0]
    df["homogen_1"] = graycoprops(glcm_1, "homogeneity")[0]
    df["contrast_1"] = graycoprops(glcm_1, "contrast")[0]
    df["energy_1"] = graycoprops(glcm_1, "energy")[0]

    glcm_2 = graycomatrix(binary_image, [3], [0])

    df["corr_2"] = graycoprops(glcm_2, "correlation")[0]
    df["dis_sim_2"] = graycoprops(glcm_2, "dissimilarity")[0]
    df["homogen_2"] = graycoprops(glcm_2, "homogeneity")[0]
    df["contrast_2"] = graycoprops(glcm_2, "contrast")[0]
    df["energy_2"] = graycoprops(glcm_2, "energy")[0]

    glcm_3 = graycomatrix(binary_image, [5], [0])

    df["corr_3"] = graycoprops(glcm_3, "correlation")[0]
    df["dis_sim_3"] = graycoprops(glcm_3, "dissimilarity")[0]
    df["homogen_3"] = graycoprops(glcm_3, "homogeneity")[0]
    df["contrast_3"] = graycoprops(glcm_3, "contrast")[0]
    df["energy_3"] = graycoprops(glcm_3, "energy")[0]

    glcm_4 = graycomatrix(binary_image, [1], [np.pi / 4])

    df["corr_4"] = graycoprops(glcm_4, "correlation")[0]
    df["dis_sim_4"] = graycoprops(glcm_4, "dissimilarity")[0]
    df["homogen_4"] = graycoprops(glcm_4, "homogeneity")[0]
    df["contrast_4"] = graycoprops(glcm_4, "contrast")[0]
    df["energy_4"] = graycoprops(glcm_4, "energy")[0]

    glcm_5 = graycomatrix(binary_image, [3], [np.pi / 4])

    df["corr_5"] = graycoprops(glcm_5, "correlation")[0]
    df["dis_sim_5"] = graycoprops(glcm_5, "dissimilarity")[0]
    df["homogen_5"] = graycoprops(glcm_5, "homogeneity")[0]
    df["contrast_5"] = graycoprops(glcm_5, "contrast")[0]
    df["energy_5"] = graycoprops(glcm_5, "energy")[0]

    glcm_6 = graycomatrix(binary_image, [5], [np.pi / 4])

    df["corr_6"] = graycoprops(glcm_6, "correlation")[0]
    df["dis_sim_6"] = graycoprops(glcm_6, "dissimilarity")[0]
    df["homogen_6"] = graycoprops(glcm_6, "homogeneity")[0]
    df["contrast_6"] = graycoprops(glcm_6, "contrast")[0]
    df["energy_6"] = graycoprops(glcm_6, "energy")[0]
        
    
    return df

In [3]:
####
# extract the shapes from leaf
# binary image : pass the image with leaf seg in it (not just border)
def extract_leaf_shape_features(binary_img):

    df = pd.DataFrame()

    contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return None

    cnt = max(contours, key=cv2.contourArea)
    # Feature 1: Area
    area = cv2.contourArea(cnt)

    # Feature 2: Perimeter
    perimeter = cv2.arcLength(cnt, True)


    # Feature 5: Convex hull and solidity
    hull = cv2.convexHull(cnt)
    hull_area = cv2.contourArea(hull)
    solidity = float(area) / hull_area 


    # Feature 6: Roundness = 4π * Area / Perimeter²
    roundness = 4 * np.pi * area / (perimeter**2 + 1e-6)


    # Feature 8: Rotated bounding box & aspect
    rot_rect = cv2.minAreaRect(cnt)
    (width, height) = rot_rect[1]
    rot_aspect = max(width, height) / (min(width, height) + 1e-6)

    # ratio of length and width of image, dont know exactly which one's at the top though
    # df["aspect_ratio"] = rot_aspect

    features = {
            "perimeter_to_area_ratio": [perimeter / area],
            "solidity": [solidity],
            "roundness": [roundness],
            "aspect_ratio": [rot_aspect]
        }

    # df = pd.DataFrame(features)

    return features


In [4]:

# finding the amount of green in the leaf in percentage
# output : percent of green color pixels w.r.t total leaf pixels

def green_pixels(img):

    lower_bound = np.array([40, 50, 20])
    upper_bound = np.array([150, 255, 255])

    g_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # convert the rgb image to hsv color space
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # find the pixels which are green in color and store it in a mask that makes the pixels color in the specified
    # range white
    green_mask = cv2.inRange(img_hsv, lower_bound, upper_bound)

    # count the pixels that are green
    green_pixels = cv2.countNonZero(green_mask)
    
    leaf_pixels = cv2.countNonZero(g_img)

    # calculate the percent of leaf that has green color
    percent = (green_pixels / leaf_pixels) * 100 

    return percent 


In [5]:
def brown_pixels(img):
    
    lower_bound = np.array([11, 100, 100])
    upper_bound = np.array([20, 200, 255])

    img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    brown_mask = cv2.inRange(img_hsv, lower_bound, upper_bound)

    brown_pixels = cv2.countNonZero(brown_mask)

    leaf_pixels = cv2.countNonZero(img_g)

    brown_percent =  (brown_pixels / leaf_pixels) * 100 

    return brown_percent


In [6]:
import pandas as pd
####
# extracts color features from an image and returns dataframe
# the df has green color, brown color, green/brown ratio
def extract_color_features(img):
    # df = pd.DataFrame()

    green = green_pixels(img)   # expected: int
    brown = brown_pixels(img)   # expected: int

    # Create a single-row dataframe from the values
    data = {
        "green_color": [green],
        "brown_color": [brown],
        "green_to_brown_ratio": [0.0 if brown == 0 else green / brown]
    }

    # df = pd.DataFrame(data)

    return data 


In [7]:

basil_dataset = pd.DataFrame()

for folder in glob.glob("./images/*"):

    label = folder.split("\\")[1]


    for image_path in glob.glob(f"{folder}/*.jpg"):
        img_feature_dict = {"label": label}
        image_name = image_path.split("\\")[1]

        location = {
            "image_name": image_name,
            "category": label
        }

        img = cv2.imread(image_path)
        img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        dict1 = extract_color_features(img)
        dict2 = extract_leaf_shape_features(img_g)
        df1 = glcm(img_g)

        dict3 = df1.to_dict()
        # print(dict3)
        # Wrap all values in lists to form one row
        dict3 = {k: [v] for k, v in dict3.items()} 

        img_feature_dict.update(dict1)
        img_feature_dict.update(dict2)
        img_feature_dict.update(dict3)
        img_feature_dict.update(location)

        df = pd.DataFrame(img_feature_dict)

        basil_dataset = pd.concat([basil_dataset, df], ignore_index=True)
        # break
    # break

        
basil_dataset.shape


(928, 40)

In [8]:
basil_dataset.to_csv("basil_dataset.csv", index=False)