# Features extraction
The purpose here is to extract some features from the  plant leaf image segmented. After extraction, we create a csv file that will be used for the classification stage.

| Features names |   Elements  |
|----------------|-------------|
| Form features  | physiological_length, physiological_width, perimeter, area, diameter |
| Morphological features | rectangularity, circularity, aspect_ratio |
| Colors features | mean and standard deviation of color channel |
| Texture features | contrast, entropy, correlation, inverse_difference_moments |

## Future Perspective
- Using litterature review for better features selections
- I wonder if the color features are impactful

In [16]:
# Libraries needed
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import mahotas

In [25]:
def create_database():
    """Function that create a dataset of leaf features extracted"""
    
    names = ['area','perimeter','physiological_length','physiological_width','aspect_ratio','rectangularity','circularity',
             'diameter','mean_red','mean_green','mean_blue','stddev_red','stddev_green','stddev_blue',
             'contrast','correlation','inverse_difference_moments','entropy','species','img_name'
            ]
    data = []

    ds_path = "./../Downloads/Main-Data/"
    img_files = os.listdir(ds_path)

    for species in img_files:
        species_path = os.path.join(ds_path, species)
        images = os.listdir(species_path)
        for img in images:
            img_path = os.path.join(species_path, img)
            features = extract_features(img_path)
            data.append(features + [species] + [img])

    df = pd.DataFrame(data, columns=names)
    return df

In [23]:
def extract_features(path):
    """
    Function that extract features and return a list of them
    It's basically the segmentation's function
    """
    
    image = cv2.imread(path)
    image = cv2.resize(image, (1600, 1600))
    image_b = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)

    pixels = image_lab[:,:,1].flatten()
    pixels = np.float32(pixels)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    k = 2
    compactness, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    centers = np.uint8(centers)
    segmented_image = centers[labels.flatten()]
    segmented_image = segmented_image.reshape(image_lab[:,:,1].shape)
    
    center_target = min(centers)
    binary_mask = np.where(segmented_image == center_target, 255, 0).astype(np.uint8)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
    
    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_DILATE, kernel)

    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    leaf = np.zeros_like(image_lab[:,:,1])

    cv2.drawContours(leaf, [max(contours, key=cv2.contourArea)], -1, 255, 2)

    segmented_image = cv2.bitwise_and(image_b, image_b, mask=binary_mask)

    contours, _ = cv2.findContours(leaf, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnt = contours[0]
    
    area = cv2.contourArea(cnt)
    perimeter = cv2.arcLength(cnt,True)
    x,y,w,h = cv2.boundingRect(cnt)
    if w > h:
        aspect_ratio = float(h)/w
    else: 
        aspect_ratio = float(w)/h
    
    rectangularity = area / (w*h)
    
    circularity = (4 * np.pi * area) / ((perimeter)**2)
    equi_diameter = 2 * np.sqrt(area / np.pi)

    red_channel = segmented_image[:,:,0]
    green_channel = segmented_image[:,:,1]
    blue_channel = segmented_image[:,:,2]
    
    red_mean = np.mean(red_channel)
    green_mean = np.mean(green_channel)
    blue_mean = np.mean(blue_channel)
    
    red_std = np.std(red_channel)
    green_std = np.std(green_channel)
    blue_std = np.std(blue_channel)
    
    textures = mahotas.features.haralick(segmented_image)
    ht_mean = textures.mean(axis=0)
   
    contrast = ht_mean[1]
    correlation = ht_mean[2]
    inverse_diff_moments = ht_mean[4]
    entropy = ht_mean[8]
    
    return [area, perimeter, w, h, aspect_ratio, rectangularity, circularity, equi_diameter,
           red_mean, green_mean, blue_mean, red_std, green_std, blue_std, contrast,
           correlation, inverse_diff_moments, entropy]

In [24]:
extract_features("./../Downloads/Plant-leaves/Mango/0001_0096.JPG")

[556358.0,
 3553.031857609749,
 1374,
 598,
 0.43522561863173215,
 0.6771212143340489,
 0.55381663716509,
 841.6513569345478,
 8.595147265625,
 15.289895703125,
 6.48874765625,
 18.176401492501007,
 30.146165657354874,
 15.497898066464067,
 223.08158965305637,
 0.8172086571900673,
 0.8001555937094343,
 3.3197995781218803]

In [26]:
dataset = create_database()
dataset.to_csv("plant_features_kmeans_v1.csv")