# Preprocess and Feature Extraction

Extracted features are saved in file named "leaf_features.csv"

In [3]:

import os
import cv2
import numpy as np
import pandas as pd
import mahotas as mt
from matplotlib import pyplot as plt
%matplotlib inline

C:\Users\Ayush\anaconda3\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
C:\Users\Ayush\anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll


In [4]:
ds_path = "D:\Leaves"
img_files = os.listdir(ds_path)

In [10]:
def create_dataset():
    names = ['area','perimeter','physiological_length','physiological_width','aspect_ratio','rectangularity','circularity', \
             'mean_r','mean_g','mean_b','stddev_r','stddev_g','stddev_b', \
             'contrast','correlation','inverse_difference_moments','entropy'
            ]
    df = pd.DataFrame([], columns=names)
    for file in img_files:
        imgpath = ds_path + "\\" + file
        print(imgpath)
        main_img = cv2.imread(imgpath)
        
        #Preprocessing
        img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
        gs = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        blur = cv2.GaussianBlur(gs, (25,25),0)
        ret_otsu,im_bw_otsu = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        kernel = np.ones((50,50),np.uint8)
        closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
        
        #Shape features
        contours, image = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        cnt = contours[0]
        M = cv2.moments(cnt)
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt,True)
        x,y,w,h = cv2.boundingRect(cnt)
        aspect_ratio = float(w)/h
        rectangularity = w*h/area
        circularity = ((perimeter)**2)/area
        
        #Color features
        red_channel = img[:,:,0]
        green_channel = img[:,:,1]
        blue_channel = img[:,:,2]
        blue_channel[blue_channel == 255] = 0
        green_channel[green_channel == 255] = 0
        red_channel[red_channel == 255] = 0
        
        red_mean = np.mean(red_channel)
        green_mean = np.mean(green_channel)
        blue_mean = np.mean(blue_channel)
        
        red_std = np.std(red_channel)
        green_std = np.std(green_channel)
        blue_std = np.std(blue_channel)
        
        #Texture features
        textures = mt.features.haralick(gs)
        ht_mean = textures.mean(axis=0)
        contrast = ht_mean[1]
        correlation = ht_mean[2]
        inverse_diff_moments = ht_mean[4]
        entropy = ht_mean[8]
        
        vector = [area,perimeter,w,h,aspect_ratio,rectangularity,circularity,\
                  red_mean,green_mean,blue_mean,red_std,green_std,blue_std,\
                  contrast,correlation,inverse_diff_moments,entropy
                 ]
        
        df_temp = pd.DataFrame([vector],columns=names)
        df = df.append(df_temp)
        print(file)
    return df

In [11]:
dataset = create_dataset()

D:\Leaves\AG-S-001.jpg
AG-S-001.jpg
D:\Leaves\AG-S-002.jpg
AG-S-002.jpg
D:\Leaves\AG-S-003.jpg
AG-S-003.jpg
D:\Leaves\AG-S-004.jpg
AG-S-004.jpg
D:\Leaves\AG-S-005.jpg
AG-S-005.jpg
D:\Leaves\AG-S-006.jpg
AG-S-006.jpg
D:\Leaves\AG-S-007.jpg
AG-S-007.jpg
D:\Leaves\AG-S-008.jpg
AG-S-008.jpg
D:\Leaves\AG-S-009.jpg
AG-S-009.jpg
D:\Leaves\AG-S-010.jpg
AG-S-010.jpg
D:\Leaves\AG-S-011.jpg
AG-S-011.jpg
D:\Leaves\AG-S-012.jpg
AG-S-012.jpg
D:\Leaves\AG-S-013.jpg
AG-S-013.jpg
D:\Leaves\AG-S-014.jpg
AG-S-014.jpg
D:\Leaves\AG-S-015.jpg
AG-S-015.jpg
D:\Leaves\AG-S-016.jpg
AG-S-016.jpg
D:\Leaves\AG-S-017.jpg
AG-S-017.jpg
D:\Leaves\AG-S-018.jpg
AG-S-018.jpg
D:\Leaves\AG-S-019.jpg
AG-S-019.jpg
D:\Leaves\AG-S-020.jpg
AG-S-020.jpg
D:\Leaves\AG-S-021.jpg
AG-S-021.jpg
D:\Leaves\AG-S-022.jpg
AG-S-022.jpg
D:\Leaves\AG-S-023.jpg
AG-S-023.jpg
D:\Leaves\AG-S-024.jpg
AG-S-024.jpg
D:\Leaves\AG-S-025.jpg
AG-S-025.jpg
D:\Leaves\AG-S-026.jpg
AG-S-026.jpg
D:\Leaves\AG-S-027.jpg
AG-S-027.jpg
D:\Leaves\AG-S-028.jpg
AG-S-

In [12]:
dataset.shape

(1761, 17)

In [13]:
type(dataset)

pandas.core.frame.DataFrame

In [14]:
dataset.to_csv("leaf_features1.csv")