<a href="https://colab.research.google.com/github/hudaor/leaf-disease-1/blob/main/crop_leaf_disease_ML_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!find  /usr/local/lib/python3.7/dist-packages/ -name '~*' -exec rm -r {} \;

In [None]:
!pip install frimcla
!pip install mahotas 

!pip install -U numpy


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
#os.kill(os.getpid(), 9)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
from os import listdir
import h5py


#--------------------
# tunable-parameters
#--------------------
images_per_class       = 17064
fixed_size             = tuple((500, 500))
train_path             = "/content/drive/MyDrive/2022dataset 05 2 22/train"
h5_train_data          = '/content/drive/MyDrive/2022dataset 05 2 22/output/train_data.h5'
h5_train_labels        = '/content/drive/MyDrive/2022dataset 05 2 22/output/train_labels.h5'
bins                   = 8

DEBUG = True #Show print statments

# Converting each image to RGB from BGR format
def rgb_bgr(image):
    rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return rgb_img

# Conversion to HSV image format from RGB
def bgr_hsv(rgb_img):
    hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2HSV)
    return hsv_img

# image segmentation
# for extraction of green and brown color
def img_segmentation(rgb_img,hsv_img):
    lower_green = np.array([25,0,20])
    upper_green = np.array([100,255,255])
    healthy_mask = cv2.inRange(hsv_img, lower_green, upper_green)
    result = cv2.bitwise_and(rgb_img,rgb_img, mask=healthy_mask)
    lower_brown = np.array([10,0,10])
    upper_brown = np.array([30,255,255])
    disease_mask = cv2.inRange(hsv_img, lower_brown, upper_brown)
    disease_result = cv2.bitwise_and(rgb_img, rgb_img, mask=disease_mask)
    final_mask = healthy_mask + disease_mask
    final_result = cv2.bitwise_and(rgb_img, rgb_img, mask=final_mask)
    return final_result
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick
    
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()
# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# empty lists to hold feature vectors and labels
global_features = []
labels          = []   

In [None]:
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    dir = os.path.join(train_path, training_name)

    # get the current training label
    current_label = training_name
    counter =0

    # loop over the images in each sub-folder
    for x in range(1,images_per_class+1):
        # get the image file name
        file = dir + "/" + str(x) + ".jpg"
        if DEBUG:
          print("INPUT FILES: " + file)
        # read the image and resize it to a fixed-size
        image = cv2.imread(file)
        try:
          image = cv2.resize(image, fixed_size)
        except cv2.error:
          print("Error in file: "+ str(file) )
          if image:
            print(str(image.shape))
          continue
        # Running Function Bit By Bit
        RGB_BGR       = rgb_bgr(image)
        BGR_HSV       = bgr_hsv(RGB_BGR)
        IMG_SEGMENT   = img_segmentation(RGB_BGR, BGR_HSV)

        # Call for Global Fetaure Descriptors
        fv_hu_moments = fd_hu_moments(IMG_SEGMENT)
        fv_haralick   = fd_haralick(IMG_SEGMENT)
        fv_histogram  = fd_histogram(IMG_SEGMENT)
        
        #Show segmented images
        if counter<5:
          numpy_horizontal = np.hstack((image, IMG_SEGMENT))
          cv2_imshow(numpy_horizontal)
          counter+=1

        # numpy_horizontal_concat = np.concatenate((image, grey_3_channel), axis=1)

        # plt.figure(figsize=(10,10))
        # counter = 0

        # if counter<10:
        #   plt.subplot(121)
        #   plt.imshow(image)

        #   plt.subplot(122)
        #   plt.imshow(IMG_SEGMENT)
        #   counter +=1

        # Concatenate 
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
        # update the list of labels and feature vectors
        labels.append(current_label)
        global_features.append(global_feature)

print("[STATUS] processed folder: {}".format(current_label))
print("[STATUS] completed Global Feature Extraction...")

In [None]:
#!pip freeze

# get the overall feature vector size
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

# get the overall training label size
print("[STATUS] training Labels {}".format(np.array(labels).shape))
# encode the target labels
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
if DEBUG:
  print("ENCODED TARGET: "+ str(target))
print("[STATUS] training labels encoded...")

# scale features in the range (0-1)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

# save the feature vector using HDF5
with h5py.File('myfile.hdf5','w') as f:
    group = f.create_group('a_group')
    group.create_dataset(name='matrix', data=np.zeros((10, 10)), chunks=True, compression='gzip')

h5_train_data          = '/content/drive/MyDrive/2022dataset 05 2 22/output/train_data.h5'
h5_train_labels        = '/content/drive/MyDrive/2022dataset 05 2 22/output/train_labels.h5'

h5f_data  = h5py.File(h5_train_data,'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

h5f_label = h5py.File(h5_train_labels,'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))


In [None]:
!pip install frimcla
!pip install mahotas 
!pip install -U numpy

In [None]:

# training
#-----------------------------------
# TRAINING OUR MODEL
#-----------------------------------
import h5py
import numpy as np
import os
import glob
import cv2
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
#from sklearn.externals import joblib
import joblib
warnings.filterwarnings('ignore')

#--------------------
# tunable-parameters
#--------------------
num_trees = 100
test_size = 0.20
seed      = 9
train_path = "/content/drive/MyDrive/2022dataset 05 2 22/train"
test_path  = "/content/drive/MyDrive/2022dataset 05 2 22/validation"
h5_train_data          = '/content/drive/MyDrive/2022dataset 05 2 22/output/train_data.h5'
h5_train_labels        = '/content/drive/MyDrive/2022dataset 05 2 22/output/train_labels.h5'

scoring    = "accuracy"

# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()

if not os.path.exists(test_path):
    os.makedirs(test_path)

# create all the machine learning models
models = []
#models.append(('LR', LogisticRegression(random_state=seed)))
models.append(('LDA', LinearDiscriminantAnalysis() ))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier(random_state=seed)))
models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(random_state=seed, probability=True))) #Added probability=True
# models.append(('SVM', SVC(random_state=seed)))

# variables to hold the results and names
results = []
names   = []

# import the feature vector and trained labels
h5f_data  = h5py.File(h5_train_data, 'r')
h5f_label = h5py.File(h5_train_labels, 'r')

global_features_string = h5f_data['dataset_1']
global_labels_string   = h5f_label['dataset_1']

global_features = np.array(global_features_string)
global_labels   = np.array(global_labels_string)

h5f_data.close()
h5f_label.close()

# verify the shape of the feature vector and labels
print("[STATUS] features shape: {}".format(global_features.shape))
print("[STATUS] labels shape: {}".format(global_labels.shape))

print("[STATUS] training started...")
# split the training and testing data
(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
                                                                                          np.array(global_labels), test_size=test_size, random_state=seed)

print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataGlobal.shape))
print("Test data   : {}".format(testDataGlobal.shape))

trainDataGlobal
for name, model in models:
    kfold = KFold(n_splits=10, shuffle=True,random_state=seed)
    cv_results = cross_val_score(model, trainDataGlobal, trainLabelsGlobal, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

# boxplot algorithm comparison
from matplotlib import pyplot as plt
import seaborn as sns
fig = plt.figure()
fig.suptitle('Comparison of machine learning algorithms')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

plt.savefig('Comparison of machine learning algorithms.pdf', dpi=800)

In [None]:
#Calculating ROC and AUC metrics
#REFER TO: https://machinelearningmastery.com/roc-curves-and-precision-recall-curves-for-classification-in-python/
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot
import matplotlib.pyplot as plt
i=0

for name, model in models:
  pyplot.figure(figsize=(70,6))
  #predict probabilities
  model.fit(trainDataGlobal, trainLabelsGlobal)
  model_probs = model.predict_proba(testDataGlobal)
  #keep positive outcome
  model_probs = model_probs[:,1]
  #calculate auc score
  model_auc = roc_auc_score(testLabelsGlobal,model_probs)
  #score
  print(name+": ROC AUC=%.3f" % model_auc)

  #calculate roc curves
  model_fpr, model_tpr, _ = roc_curve(testLabelsGlobal, model_probs)

  #plot roc_curve for the model
  pyplot.subplot(171+i)
  pyplot.plot(model_fpr, model_tpr, linestyle='--', color='r', label="%s ROC curve (area=%0.2f)" % (name, model_auc))
  #pyplot.fill(model_fpr, model_tpr, color='b', label='AUC')#AUC
  #axis labels
  pyplot.grid(linestyle='--')
  pyplot.xlabel('False Positive Rate')
  pyplot.ylabel('True Positive Rate')
  #legend
  pyplot.legend(loc="lower right")
  #show plot
  pyplot.show()
  i+=1
 
#Notice: u will get an error whith svm initialize the class constructor with arg "probability=True" to fix it

In [None]:
print(results)

In [None]:


# boxplot algorithm comparison
from matplotlib import pyplot as plt
import seaborn as sns
fig = plt.figure()
fig.suptitle('Comparison of machine learning algorithms')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.savefig('Comparison of machine learning algorithms.pdf', dpi=1000)
plt.show()

In [None]:
#Coorected this for you to get figures with better quality (I remember the remark from last article)
from google.colab import files
fig.savefig("fig1.eps", format="eps") #u can still change eps to png but eps is recommended in articles
files.download("fig1.eps")
# files.download('Comparison of machine learning algorithms')

In [None]:
import seaborn as sns

for name, model in models:
  # model.fit(trainDataGlobal, trainLabelsGlobal)
  y_predict=model.predict(testDataGlobal)
  y_predict = np.argmax(Y_pred, axis=1)

  labels = ['Diseased', 'Healthy'] 
  cm = confusion_matrix(testLabelsGlobal,y_predict)
  ax = fig.add_subplot(111)
  confusion=sns.heatmap(cm ,annot=True, fmt ='g', ax=ax,  xticklabels=labels, yticklabels=labels,cmap="viridis")
  print(cm)
  
  fig = plt.figure()
  ax = fig.add_subplot(111)
  cax= ax.matshow(cm)
  plt.title('Confusion matrix ' + name)
  fig.colorbar(cax)
  ax.set_xticklabels(['']+labels)
  ax.set_yticklabels(['']+labels)
  figure = confusion.get_figure()    
  figure.savefig('confusion_ML.pdf', dpi=800)
  #plt.xlabel('Predicted')
  #plt.ylabel('True')
  plt.show()
  


In [None]:
from sklearn.metrics import classification_report
for name, model in models:
   y_predict=model.predict(testDataGlobal)
   print(classification_report(testLabelsGlobal,y_predict ,target_names=['Diseased', 'default'], digits=4))


In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(testLabelsGlobal, y_predict)