In [1]:
import os
import cv2
import mahotas
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import h5py
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import glob
import matplotlib.pyplot as plt

In [15]:
# make a fix file size
fixed_size  = tuple((512,512))

#train path 
train_path = "train/rgb"
test_path = "test/rgb"

# no of trees for Random Forests
num_tree = 25

# bins for histograms 
bins = 8

# train_test_split size
test_size = 0.20

# seed for reproducing same result 
seed = 9

In [8]:
# features description -1:  Hu Moments

def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [9]:
# feature-descriptor -2 Haralick Texture 

def fd_haralick(image):
    # conver the image to grayscale
    gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    # Ccompute the haralick texture fetature ve tor 
    haralic = mahotas.features.haralick(gray).mean(axis=0)
    return haralic

In [10]:
# feature-description -3 Color Histogram

def fd_histogram(image, mask=None):
    # conver the image to HSV colors-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    #COPUTE THE COLOR HISTPGRAM
    hist  = cv2.calcHist([image],[0,1,2],None,[bins,bins,bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist,hist)
    # return the histog....
    return hist.flatten()

In [11]:
# get the training data labels 
train_labels = os.listdir(train_path)
test_labels = os.listdir(test_path)

# sort the training labesl 
train_labels.sort()
print(train_labels)

['double_plant', 'drydown', 'endrow', 'nutrient_deficiency', 'planter_skip', 'storm_damage', 'water', 'waterway', 'weed_cluster']


In [12]:
# empty list to hold feature vectors and labels 
global_features = []
labels = []
i, j = 0, 0 
k = 0

# num of images per class 
images_per_class = 120

## loop insise the folder for train images

In [13]:
# ittirate the folder to get the image label name

%time
# lop over the training data sub folder 

for training_name in train_labels:
    # join the training data path and each species training folder
    dir = os.path.join(train_path, training_name)
    
    # get the current training label
    current_label = training_name
    
    k = 1
    # loop over the images in each sub-folder
    
    for file in os.listdir(dir):
        file = dir + "/" + os.fsdecode(file)
        
        # read the image and resize it to a fixed-size
        image = cv2.imread(file)
        nir = cv2.imread(file.replace('rgb','nir'))
        
        if image is not None:
            image = cv2.resize(image,fixed_size)
            fv_hu_moments = fd_hu_moments(image)
            fv_haralick   = fd_haralick(image)
            fv_histogram  = fd_histogram(image)
            nir = cv2.resize(nir,fixed_size)
            fv_hu_moments2 = fd_hu_moments(nir)
            fv_haralick2   = fd_haralick(nir)
            fv_histogram2  = fd_histogram(nir)
            
        # Concatenate global features
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments, fv_histogram2, fv_haralick2, fv_hu_moments2])
        
        # update the list of labels and feature vectors
        labels.append(current_label)
        global_features.append(global_feature)
        
        i += 1
        k += 1
    print("[STATUS] processed folder: {}".format(current_label))
    j += 1

print("[STATUS] completed Global Feature Extraction...")

Wall time: 0 ns
[STATUS] processed folder: double_plant
[STATUS] processed folder: drydown
[STATUS] processed folder: endrow
[STATUS] processed folder: nutrient_deficiency
[STATUS] processed folder: planter_skip
[STATUS] processed folder: storm_damage
[STATUS] processed folder: water
[STATUS] processed folder: waterway
[STATUS] processed folder: weed_cluster
[STATUS] completed Global Feature Extraction...


In [None]:

plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.show()
plt.savefigure('muestra1.jpg')

In [17]:
%time
# get the overall feature vector size
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

# get the overall training label size
print("[STATUS] training Labels {}".format(np.array(labels).shape))

# encode the target labels
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print("[STATUS] training labels encoded...{}")

# normalize the feature vector in the range (0-1)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")

print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

# save the feature vector using HDF5
h5f_data = h5py.File('data.h5', 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

h5f_label = h5py.File('labels.h5', 'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))

h5f_data.close()
h5f_label.close()

print("[STATUS] end of training..")

Wall time: 0 ns
[STATUS] feature vector size (1080, 1064)
[STATUS] training Labels (1080,)
[STATUS] training labels encoded...{}
[STATUS] feature vector normalized...
[STATUS] target labels: [0 0 0 ... 8 8 8]
[STATUS] target labels shape: (1080,)
[STATUS] end of training..


In [18]:
# import the feature vector and trained labels

h5f_data = h5py.File('data.h5', 'r')
h5f_label = h5py.File('labels.h5', 'r')

global_features_string = h5f_data['dataset_1']
global_labels_string = h5f_label['dataset_1']

global_features = np.array(global_features_string)
global_labels = np.array(global_labels_string)

In [19]:
# split the training and testing data
(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
                                                                                          np.array(global_labels),
                                                                                          test_size=test_size,
                                                                                          random_state=seed)

### RandomForest

In [20]:
# create the model - Random Forests
clf  = RandomForestClassifier(n_estimators=num_tree, max_features=None, max_depth=None)

# fit the training data to the model
clf.fit(trainDataGlobal, trainLabelsGlobal)

#print(clf.fit(trainDataGlobal, trainLabelsGlobal))
clf_pred = clf.predict(trainDataGlobal)

print(classification_report(trainLabelsGlobal,clf_pred))
#print(confusion_matrix(trainLabelsGlobal,clf_pred))

#print(clf.predict(trainDataGlobal))

#print(clf.predict(global_feature.reshape(1,-1))[0])

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        95
           1       1.00      1.00      1.00       101
           2       1.00      1.00      1.00        94
           3       1.00      1.00      1.00        95
           4       1.00      1.00      1.00        96
           5       1.00      1.00      1.00       104
           6       1.00      1.00      1.00        96
           7       1.00      1.00      1.00        87
           8       1.00      1.00      1.00        96

    accuracy                           1.00       864
   macro avg       1.00      1.00      1.00       864
weighted avg       1.00      1.00      1.00       864



In [22]:
print(confusion_matrix(trainLabelsGlobal,clf_pred))

[[ 95   0   0   0   0   0   0   0   0]
 [  0 101   0   0   0   0   0   0   0]
 [  0   0  94   0   0   0   0   0   0]
 [  0   0   0  95   0   0   0   0   0]
 [  0   0   0   0  96   0   0   0   0]
 [  0   0   0   0   0 104   0   0   0]
 [  0   0   0   0   0   0  96   0   0]
 [  0   0   0   0   0   0   0  87   0]
 [  0   0   0   0   0   0   0   0  96]]


In [21]:
%time
# path to test data
predictions = []
test_label = []
i = 0
# loop through the test images
#for file in glob.glob(test_path + "/*.jpg"):
for test_name in test_labels:
    dir = os.path.join(test_path, test_name)
    
    #get the current test Label
    current_label = test_name
    # loop through the test images
    for file in os.listdir(dir):
        file = dir + '/' + os.fsdecode(file)
        
        #read the image
        image = cv2.imread(file)
        #resize the image
        image = cv2.resize(image, fixed_size)
        
        nir = cv2.imread(file.replace('rgb','nir'))
        nir = cv2.resize(nir, fixed_size)
        
        #Global Feature extraction
        fv_hu_moments = fd_hu_moments(image)
        fv_haralick   = fd_haralick(image)
        fv_histogram  = fd_histogram(image)
        fv_hu_moments2 = fd_hu_moments(nir)
        fv_haralick2   = fd_haralick(nir)
        fv_histogram2  = fd_histogram(nir)
        
        #Concatenate global features
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments, fv_histogram2, fv_haralick2, fv_hu_moments2])
        
        # predict label of test image
        prediction = clf.predict(global_feature.reshape(1,-1))[0]
        predictions.append(prediction)
        #print(prediction)
        test_label.append(i)

        # show predicted label on image
        cv2.putText(image, train_labels[prediction], (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
        cv2.putText(image, train_labels[i], (20,60), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255), 3)
        # display the output image
        #plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        #plt.show()
    i += 1
print(classification_report(test_label,predictions)) 
print(confusion_matrix(test_label,predictions))

Wall time: 0 ns
              precision    recall  f1-score   support

           0       0.23      0.10      0.14        50
           1       1.00      0.78      0.88        50
           2       0.00      0.00      0.00        50
           3       0.29      0.68      0.40        50
           4       0.27      0.84      0.41        50
           5       0.17      0.10      0.12        50
           6       0.96      0.50      0.66        50
           7       0.56      0.36      0.44        50
           8       0.67      0.32      0.43        50

    accuracy                           0.41       450
   macro avg       0.46      0.41      0.39       450
weighted avg       0.46      0.41      0.39       450

[[ 5  0  0  0 43  0  0  0  2]
 [ 0 39  0  0 10  0  0  0  1]
 [ 4  0  0 13 24  4  0  3  2]
 [ 0  0  2 34  0  6  0  8  0]
 [ 3  0  0  2 42  0  1  0  2]
 [ 1  0  0 43  0  5  0  1  0]
 [ 4  0  1 11  5  1 25  2  1]
 [ 1  0  1 16  0 14  0 18  0]
 [ 4  0  0  0 30  0  0  0 16]]
