In [1]:
import os
import glob
import datetime
import tarfile
import urllib.request

In [2]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
fixed_size       = tuple((64, 64))
bins             = 8

In [3]:
from skimage.color import rgb2hsv
def color_Moments(img):
    img=rgb2hsv(img)
    R=img[:,:,0]
    G=img[:,:,1]
    B=img[:,:,2]
    colorFeatures=[np.mean(R[:]),np.std(R[:]),np.mean(G[:]),np.std(G[:]),np.mean(B[:]),np.std(B[:])]
    colorFeatures=colorFeatures/np.mean(colorFeatures)
    return colorFeatures

In [4]:
def fd_HSV(image, mask=None):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([np.uint8(image)], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten().tolist()

In [5]:
# feature-descriptor-1: Hu Moments 
# forme
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [6]:
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return haralick

In [7]:
from skimage import feature
def rgb2gray(rgb):
    r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray
def glcm_props(patch):
    lf = []
    props = ['dissimilarity', 'contrast', 'homogeneity', 'energy', 'correlation']
    patch=np.array(rgb2gray(patch), int)
    # left nearest neighbor
    glcm = feature.greycomatrix(patch, [1], [0], 256, symmetric=True, normed=True)
    for f in props:
        lf.append( feature.greycoprops(glcm, f)[0,0] )
    # upper nearest neighbor
    glcm = feature.greycomatrix(patch, [1], [np.pi/2], 256, symmetric=True, normed=True)
    for f in props:
        lf.append( feature.greycoprops(glcm, f)[0,0] )
    return np.asarray(lf)

In [8]:
# get the training labels
train_path="../train_dataset"
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# empty lists to hold feature vectors and labels
global_features = []
labels          = []

['Calendula', 'Coquelicot', "Feuille d'olivier", 'Feuilles de figuier', 'Glebionis coronaria', 'Jasmin', 'La verveine', 'Menthe', 'Ortie', 'Persil', 'Romarin', 'Sauge', 'Thym', 'lavande']


In [32]:
# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    dir = os.path.join(train_path, training_name)
    nb_img=len(os.listdir(dir))
    # get the current training label
    current_label = training_name
    print(current_label,"-->",nb_img)
    # loop over the images in each sub-folder
    for elem in os.listdir(dir):
        # get the image file name
        # read the image and resize it to a fixed-size
        image = cv2.imread(dir+"/"+elem)
        try:
            image = cv2.resize(image, fixed_size)
        except:
            print(elem)
        ####################################
        # Global Feature extraction
        ####################################
        color_moment=color_Moments(image)
        fv_hu_moments = fd_hu_moments(image)
#         fv_haralick   = fd_haralick(image)
        histogram=fd_HSV(image)
        glcm=glcm_props(image)
        ###################################
        # Concatenate global features
        ###################################
        global_feature = np.hstack([color_moment,fv_hu_moments,histogram,glcm])
        # update the list of labels and feature vectors
        labels.append(current_label)
        global_features.append(global_feature)
        

    print("[STATUS] processed folder: {}".format(current_label))

print("[STATUS] completed Global Feature Extraction...")

Calendula --> 419
[STATUS] processed folder: Calendula
Coquelicot --> 637
[STATUS] processed folder: Coquelicot
Feuille d'olivier --> 463
[STATUS] processed folder: Feuille d'olivier
Feuilles de figuier --> 480
[STATUS] processed folder: Feuilles de figuier
Glebionis coronaria --> 522
[STATUS] processed folder: Glebionis coronaria
Jasmin --> 501
[STATUS] processed folder: Jasmin
La verveine --> 547
[STATUS] processed folder: La verveine
Menthe --> 508
[STATUS] processed folder: Menthe
Ortie --> 587
[STATUS] processed folder: Ortie
Persil --> 544
[STATUS] processed folder: Persil
Romarin --> 498
[STATUS] processed folder: Romarin
Sauge --> 521
[STATUS] processed folder: Sauge
Thym --> 575
[STATUS] processed folder: Thym
lavande --> 619
[STATUS] processed folder: lavande
[STATUS] completed Global Feature Extraction...


In [9]:
import joblib
# joblib.dump(global_features,"features.pkl",compress=True)
# joblib.dump(labels,"labels.pkl",compress=True)

features    = np.array(joblib.load("features.pkl"))
labels  = np.array(joblib.load("labels.pkl"))
print(features.shape)
print(labels.shape)

(7421, 535)
(7421,)


In [11]:
import numpy as np
import os
import glob
import cv2
import warnings
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn import svm

warnings.filterwarnings('ignore')

num_trees = 500
test_size = 0.10
seed      = 9
scoring    = "accuracy"
# create all the machine learning models
models = []
models.append(('LR', LogisticRegression(random_state=seed)))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier(random_state=seed)))
models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
models.append(('NB', GaussianNB()))
# models.append(('SVM', SVC(random_state=seed)))
models.append(('SVM_2', svm.SVC(kernel='rbf', C=7)))

In [12]:
# split the training and testing data
(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(features),
                                                                                          np.array(labels),
                                                                                          test_size=test_size,
                                                                                          random_state=seed)

print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataGlobal.shape))
print("Test data   : {}".format(testDataGlobal.shape))
print("Train labels: {}".format(trainLabelsGlobal.shape))
print("Test labels : {}".format(testLabelsGlobal.shape))

[STATUS] splitted train and test data...
Train data  : (6678, 535)
Test data   : (743, 535)
Train labels: (6678,)
Test labels : (743,)


In [None]:
# 10-fold cross validation
results=[]
names=[]
for name, model in models:
    kfold = KFold(n_splits=10, random_state=None)
    cv_results = cross_val_score(model, trainDataGlobal, trainLabelsGlobal, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

# boxplot algorithm comparison
fig = pyplot.figure()
fig.suptitle('Machine Learning algorithm comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()
# os.system("shutdown /s /t 1")shutdown your computer

LR: 0.505243 (0.023106)
LDA: 0.511085 (0.021213)
KNN: 0.126381 (0.011424)
CART: 0.553319 (0.021721)


In [37]:
import pickle
# create the model - Random Forests
clf  = RandomForestClassifier(n_estimators=500, random_state=seed)
# clf = DecisionTreeClassifier(random_state=seed)
# clf = svm.SVC(kernel='rbf', C=100)
# fit the training data to the model
clf.fit(trainDataGlobal, trainLabelsGlobal)
with open("model.pkl","wb")as file:
    pickle.dump(clf,file)
    file.close()
b=clf.predict(testDataGlobal).tolist()
a=testLabelsGlobal
accuracy = len([a[i] for i in range(0, len(a)) if a[i] == b[i]]) / len(a)
print(accuracy)

0.7725437415881561


In [38]:
#training accuraccy
import pickle 
with open("model.pkl","rb")as file:
    clf= pickle.load(file)
    
list2=clf.predict(trainDataGlobal).tolist()

b = list2   # predicted labels
a=trainLabelsGlobal

accuracy = len([a[i] for i in range(0, len(a)) if a[i] == b[i]]) / len(a)
print(accuracy)

0.9998502545672358


In [None]:
from matplotlib import pyplot as plt
import pickle
with open("model.pkl","rb")as file:
    clf=pickle.load(file)

for image in os.listdir("test"):
    # read the image
    image = cv2.imread("test/"+image)
    # resize the image
    image = cv2.resize(image, fixed_size)
    color_moment=color_Moments(image)
    fv_hu_moments = fd_hu_moments(image)
    fv_haralick   = fd_haralick(image)
    histogram=fd_HSV(image)
    glcm=glcm_props(image)
    ###################################
    # Concatenate global features
    ###################################
    global_feature = np.hstack([color_moment,fv_hu_moments,fv_haralick,histogram,glcm])
    global_feature = np.array(global_feature).reshape(-1,1)
    print(global_feature.shape)
     # scale features in the range (0-1)
#     scaler = MinMaxScaler(feature_range=(0, 1))
#     rescaled_feature = scaler.fit_transform(global_feature)
    # predict label of test image
    prediction = clf.predict(global_feature.reshape(1,-1))[0]
    # show predicted label on image
    cv2.putText(image, prediction, (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
    # display the output image
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.show()