In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cv2 as cv
from skimage.measure import label, regionprops, regionprops_table
from skimage.feature.texture import local_binary_pattern
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import classification_report


leaf segmentation

In [5]:
def otsu_segmentation(path):
    img=cv.imread(path,0)
    img = cv.GaussianBlur(img,(5,5),0)
    ret1,th1=cv.threshold(img,128,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    rect=cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
    dilation = cv.dilate(th1,rect,iterations = 2)
    erosion = cv.erode(dilation, rect, iterations=1)
    th1=cv.bitwise_not(th1)
    th1=cv.medianBlur(th1,9)
    return th1


#getting rid of color correction card 
def segmentation(path):
    image = otsu_segmentation(path)
    h, w = image.shape
    mask= np.ones((h,w),np.uint8)
    mask[:,int(0.8*w):w]=0
    mask[int(h*0.9):h,:]=0


    image=cv.bitwise_and(image,image,mask=mask)

 
   
    return image


    

    



chosing and creating features

In [6]:
def create_features(path,name):
    img=cv.imread(path)
    seg=segmentation(path)
    label_img, num_of_labels = label(seg,return_num=True)
    df = pd.DataFrame( regionprops_table(label_img,img,properties=
                                        ['convex_area', 'area',
                                        'eccentricity', 'extent',                   
                                        'inertia_tensor',
                                        'major_axis_length', 
                                        'minor_axis_length']))
    df2=df.mean(axis=0)
    df2['path']=path
    df2['species']=name
    df2['convex_ratio']=df2['area']/df2['convex_area']
    df2['axis_ratio']=df2['major_axis_length']/df2['minor_axis_length']
    df2['num_of_labels']=num_of_labels
    img=cv.bitwise_and(img,img,mask=seg)
    # df2['lbp']=local_binary_pattern(img,3,method='uniform')


    return df2

Creating df with all photos of chosen species

In [7]:
df =pd.read_csv('leafsnap-dataset-images.txt', sep='\t')
species = ['Aesculus hippocastamon', 'Albizia julibrissin' ,'Celtis occidentalis', 'Fagus grandifolia', 'Magnolia stellata', 'Malus baccata', 'Malus pumila', 'Taxodium distichum', 'Tilia americana', 'Tilia europaea']
df.query('source =="lab"',inplace=True)
df.query('species == @species',inplace=True)

Uncomment if you want to see examples of chosen species

In [8]:
# df4 =pd.read_csv('leafsnap-dataset-images.txt', sep='\t')
# # species=species+[]
# df4.query('source =="lab"',inplace=True)
# df4=df4.drop_duplicates(subset=['species'],keep='last')
# df4.query('species == @species',inplace=True)

# for index ,row in df4.iterrows():
#     cv.imshow(row['species'],cv.imread(row['image_path']))
# cv.waitKey(0)
# cv.destroyAllWindows()

creating df with features of leaves

In [9]:
dfs=[]
for index, row in df.iterrows():
    dfs.append(create_features(row['image_path'],row['species']))

dfs=pd.DataFrame(dfs)



Data preparation

In [10]:

X_test=[]
X_train=[]
Y_test=[]
Y_train=[]

for specie in species:
    
   
    df3=dfs.query('species == @specie')
    for index, row in df3.iterrows():
        # if index ==0:
        #     cv.imshow(species[index],row['path'])
        if index%5==0:
            X_test.append(row)
            Y_test.append(row['species'])
        else:
            X_train.append(row)
            Y_train.append(row['species'])
        
   
X_test=pd.DataFrame(X_test)
Y_test=pd.DataFrame(Y_test)
Y_train=pd.DataFrame(Y_train)
X_train=pd.DataFrame(X_train)
X_test.drop(['species','path'],axis=1,inplace=True)
X_train.drop(['species','path'],axis=1,inplace=True)
# cv.waitKey(0)
# cv.destroyAllWindows()


Fitting and predicting Gradient Boosting classifier


In [11]:
model = GradientBoostingClassifier(random_state=10)
model.fit(X_train,Y_train)
report=classification_report(model.predict(X_test), Y_test,output_dict=True)
df6=pd.DataFrame()




df5=pd.DataFrame(report).transpose()
df5.to_csv('gradient_booster_report.csv')
print(classification_report(model.predict(X_test), Y_test,output_dict=False))

  y = column_or_1d(y, warn=True)


                        precision    recall  f1-score   support

Aesculus hippocastamon       0.96      1.00      0.98        23
   Albizia julibrissin       0.96      0.92      0.94        24
   Celtis occidentalis       0.78      0.86      0.82        21
     Fagus grandifolia       1.00      0.96      0.98        24
     Magnolia stellata       0.75      0.86      0.80        21
         Malus baccata       1.00      0.92      0.96        26
          Malus pumila       0.79      0.79      0.79        24
    Taxodium distichum       1.00      1.00      1.00        23
       Tilia americana       0.97      0.90      0.93        31
        Tilia europaea       1.00      1.00      1.00        24

              accuracy                           0.92       241
             macro avg       0.92      0.92      0.92       241
          weighted avg       0.93      0.92      0.92       241



In [24]:

ls= model.predict(X_test)
percentile_list = pd.DataFrame(
    {'lst1Title': ls,
     'lst2Title': Y_test.ravel()
    })

AttributeError: 'DataFrame' object has no attribute 'ravel'

Fitting and predicting Random forest classifier

In [13]:
model = RandomForestClassifier(random_state=10)
model.fit(X_train,Y_train)
report=classification_report(model.predict(X_test), Y_test,output_dict=True)



df5=pd.DataFrame(report).transpose()
df5.to_csv('random_forest_report.csv')
print(classification_report(model.predict(X_test), Y_test,output_dict=False))



  model.fit(X_train,Y_train)


                        precision    recall  f1-score   support

Aesculus hippocastamon       0.92      1.00      0.96        22
   Albizia julibrissin       0.96      0.92      0.94        24
   Celtis occidentalis       0.78      1.00      0.88        18
     Fagus grandifolia       1.00      0.96      0.98        24
     Magnolia stellata       0.79      0.90      0.84        21
         Malus baccata       1.00      1.00      1.00        24
          Malus pumila       0.92      0.85      0.88        26
    Taxodium distichum       1.00      0.96      0.98        24
       Tilia americana       1.00      0.88      0.94        33
        Tilia europaea       1.00      0.96      0.98        25

              accuracy                           0.94       241
             macro avg       0.94      0.94      0.94       241
          weighted avg       0.94      0.94      0.94       241

