In [1]:
import pandas as pd
import numpy as np
import glob, os, csv
import cv2, imutils, mahotas
import skimage.feature.texture as sft

# Features Extraction

In [2]:
def describe(img):
    global_features = []
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    R, G, B = img[:,:,0], img[:,:,1], img[:,:,2]
    feature = [np.mean(R), np.std(R), np.mean(G), np.std(G), np.mean(B), np.std(B)]
    global_features.extend(feature/np.mean(feature))

    hist = cv2.calcHist([img_hsv], [0, 1, 2], None, (8, 2, 2), [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    global_features.extend(hist.flatten())      

    glcm = sft.greycomatrix(img_gray, distances = [1], angles = [0], symmetric=True, normed=True)
    props = ['contrast', 'correlation', 'energy', 'homogeneity']
    feature = [sft.greycoprops(glcm, prop).ravel()[0] for prop in props]
    feature = feature / np.sum(feature)
    global_features.extend(feature)

    feature = mahotas.features.haralick(img_gray).ravel()
    global_features.extend(feature)
    
    feature = cv2.HuMoments(cv2.moments(img_gray)).flatten()
    global_features.extend(feature)

    return global_features

# Features Indexing

In [3]:
def index_feature(indexPathTrain,databasePathTrain):
    output = open(indexPathTrain, "w")

    for imagePath in glob.glob(databasePathTrain + os.path.sep + "*" + os.path.sep + "*.*"):
        imageID = imagePath.split(os.path.sep)[-1] # extract the name of each photo
        target = imagePath.split(os.path.sep)[-2] # extract the class of each photo

        image = cv2.imread(imagePath)
        features = describe(image)
        features = [str(f) for f in features]

        output.write("%s,%s\n" % (imageID + "," + target, ",".join(features)))

    output.close()

In [4]:
indexPathTrain = "index_train.csv"

databasePathTrain = "PlantDiseaseDataSet/train"
index_feature(indexPathTrain, databasePathTrain)

data = np.genfromtxt(indexPathTrain, delimiter=',',dtype=str)

X_train = np.array(data[:,2:],dtype=np.float64)
Y_train = data[:,1]

del data

# Data Preprocessing

In [5]:
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()
X_train = std_scaler.fit_transform(X_train)

# Training

In [6]:
from sklearn import svm

clf=svm.SVC(kernel="rbf")
%time clf.fit(X_train,Y_train)

print("score: ", clf.score(X_train,Y_train))

CPU times: user 6.3 ms, sys: 22 µs, total: 6.32 ms
Wall time: 6.35 ms
score:  0.9505494505494505




# Validation

In [7]:
indexPathValidation = "index_validation.csv"

databasePathValidation = "PlantDiseaseDataSet/validation"
index_feature(indexPathValidation, databasePathValidation)

data = np.genfromtxt(indexPathValidation, delimiter=',',dtype=str)

X_validation = np.array(data[:,2:],dtype=np.float64)
Y_validation = data[:,1]

del data

In [8]:
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()
X_validation = std_scaler.fit_transform(X_validation)

In [9]:
y_validation_pred = clf.predict(X_validation)

k=0
for i in range(0,len(y_validation_pred)):
    if Y_validation[i] == y_validation_pred[i]:
        k += 1
print(k/len(y_validation_pred))

0.8679245283018868


# Test

In [10]:
def index_test_feature(indexPathTest):
    databasePathTest = "PlantDiseaseDataSet/test"
    output = open(indexPathTest, "w")
    
    for imagePath in glob.glob(databasePathTest + os.sep + "*.*"):
        imageID = imagePath.split(os.path.sep)[-1]

        image = cv2.imread(imagePath)
        features = describe(image)
        features = [str(f) for f in features]

        output.write("%s,%s\n" % (imageID, ",".join(features)))

    output.close()

In [11]:
indexPathTest = "index_test.csv"

index_test_feature(indexPathTest)

data = np.genfromtxt(indexPathTest, delimiter=',',dtype=str)

X_test = np.array(data[:,1:],dtype=np.float64)
image_name = data[:,0]
del data

In [12]:
X_test = std_scaler.fit_transform(X_test)

pred = clf.predict(X_test)

for i in range(0,len(pred)):
    print(image_name[i], " => ", pred[i])

9284bd52-df45-4fc3-aaa6-4323666b41c4___RS_HL 7348_flipTB.JPG  =>  saine
76367d84-6702-4ef7-84f1-fe308315d04d___RS_HL 5967_flipTB.JPG  =>  saine
1d4dee76-8af5-4bce-ba44-ef87ffd664de___FREC_C.Rust 3628_newGRR.JPG  =>  malade
1cfc6e73-1d86-4fb9-bffb-010163531711___FREC_C.Rust 3944_newGRR.JPG  =>  malade
0ca16873-eeac-47e9-9a87-1859950daab7___JR_FrgE.S 2835_new30degFlipLR.JPG  =>  malade
990838f4-b976-45bb-859d-95de901acc8c___RS_HL 7365.JPG  =>  saine
350182de-38b1-43bf-b189-1fec53a7f499___RS_HL 7867_flipTB.JPG  =>  saine
8f3ed2e0-268d-485c-bfd0-0911db9ab888___JR_FrgE.S 8820.JPG  =>  malade
0ca16873-eeac-47e9-9a87-1859950daab7___JR_FrgE.S 2835_270deg.JPG  =>  malade
3643fe1d-9523-47cf-9952-7e2ecb027338___RS_HL 5704.JPG  =>  saine
6627c6e7-9d4f-41a7-9d86-ad04cfe2841a___RS_HL 7442_flipTB.JPG  =>  saine
1a69060b-e45e-4d95-881c-f6d1960dffcd___FREC_C.Rust 0065_newGRR.JPG  =>  malade
3bf50ea4-980e-471b-bbd1-fc0c526dbb7a___JR_FrgE.S 2905_90deg.JPG  =>  malade
34960f92-a973-4d36-b388-f63b339b86bc_