# Banana Leaf Disease Detection and Classification

# 1. Import necessary libraries

In [None]:
import os
import cv2 
import keras
import random
import sklearn
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt 
from keras.models import Sequential
from keras.utils import to_categorical 
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from keras.preprocessing.image import load_img
from sklearn.metrics import classification_report
from keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

print("Tensorflow version: ",tf.__version__)
print("Keras version: ",keras.__version__)
sklearn.show_versions()

# 2. Data Preprocessing

In [None]:
def Img_preprocessing(n):
    
    #step-1: Convert rgb to grayscale
    gray_scale=cv2.cvtColor(n, cv2.COLOR_BGR2GRAY )
    
    
    #step-2: Resize the image-----------------
    img_size=200
    resized_img=cv2.resize(gray_scale,(img_size,img_size)) #resize your image so your all images will have same size
    
    #step-3:do histogram equalisation to increase contrast of an image-------------
    equ = cv2.equalizeHist(resized_img)
    
    
    #step-4:remove noise :using gaussian blur----------------(for smoothing the image)
    #blur=cv2.GaussianBlur(equ,(5,5),0)
    blur=cv2.medianBlur(equ,5)
    
    #step-5: Image segmentation for edge detection-------------
    #edges = cv2.Canny(equ,100,200) 
    #edges = cv2.Laplacian(equ, cv2.CV_16S, ksize=3)
    grad_x = cv2.Sobel(blur, cv2.CV_16S, 1, 0, ksize=3, scale=1, delta=0, borderType=cv2.BORDER_DEFAULT)
    grad_y = cv2.Sobel(blur, cv2.CV_16S, 0, 1, ksize=3, scale=1, delta=0, borderType=cv2.BORDER_DEFAULT)
    abs_grad_x = cv2.convertScaleAbs(grad_x)
    abs_grad_y = cv2.convertScaleAbs(grad_y)
    edges = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0)   #sobel derivative
    
    clean_data =edges
    return clean_data




dir1="C:\Users\Muhammad Talha\Desktop\Project"
catg=['Banana Bacterial Wilt','Black sigatoka disease','Healthy']

img_cube=[]
image_size=800

for i in catg: #this will take the folder names as we call it as categoris
    path=os.path.join(dir1,i)
    label=catg.index(i)
    for j in os.listdir(path): #this will take the actual path of each folder image
        img_arr=cv2.imread(os.path.join(path,j)) #convert rgb image to gray scale image
        final_data=Img_preprocessing(img_arr)
        img_cube.append([final_data,label])
print("Length of your dataset: ",len(img_cube))
print("Whole image data in array format:\n",img_cube[10])

In [None]:
from IPython.display import Image
Image(filename='C:\Users\Muhammad Talha\Desktop\Project\img_seg.png')

In [None]:
plt.imshow(img_cube[3][0],cmap='gray')
img_cube[3][1]

In [None]:
plt.imshow(img_cube[350][0],cmap='gray')
img_cube[350][1]

In [None]:
img_cube[350][0].shape

In [None]:
plt.imshow(img_cube[500][0],cmap='gray')
img_cube[500][1]

In [None]:
#now i am trying to shuffle the data 
random.shuffle(img_cube)

for i in img_cube[:10]:
    print(i[1])

#these are the random labels generated 
#as i have 3 types of image folder in the dataset

# 3. Classifier implementation

In [None]:
from sklearn.svm import SVC

feature=[]
target=[]
for i in img_cube:
    flat=i[0].flatten()
    feature.append(flat)
for i in img_cube:
    target.append(i[1])
    
len(feature),len(target)

x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.25,random_state=42)

clf1=SVC().fit(x_train,y_train)
y_pred=clf1.predict(x_test)

print("*-*"*100)
print(accuracy_score(y_test,y_pred)*100)
print("*-*"*100)

feature[10]

# Evaluation matrix

In [None]:
labels={0:'BBW',1:'BSD',2:'Healthy'}
plt.title('Confusion matrix of the classifier')
sns.heatmap(confusion_matrix(y_test,y_pred,),cmap="YlGnBu_r", xticklabels=['BBW', 'BSD','Healthy'],yticklabels=['BBW', 'BSD','Healthy'])

confusion_matrix(y_test,y_pred)
from sklearn.metrics import classification_report
k=classification_report(y_test,y_pred,target_names=['BBW', 'BSD','Healthy'])
print(k)

# Comparing Accuracy without doing preprocessing

In [None]:
from sklearn.svm import SVC

In [None]:
dir2="C:\Users\Muhammad Talha\Desktop\Project"
catg=['Banana Bacterial Wilt','Black sigatoka disease','Healthy']

img_cube2=[]
image_size=500

for i in catg: #this will take the folder names as we call it as categoris
    path=os.path.join(dir2,i)
    label=catg.index(i)
    for j in os.listdir(path): #this will take the actual path of each folder image
        img_arr=cv2.imread(os.path.join(path,j))
        gray_scale=cv2.cvtColor(img_arr, cv2.COLOR_BGR2GRAY )
        resized_img=cv2.resize(gray_scale,(200,200))
        img_cube2.append([resized_img,label])

random.shuffle(img_cube2)

feature2=[]
target2=[]
for i in img_cube2:
    flat=i[0].flatten()
    feature2.append(flat)
for i in img_cube2:
    target2.append(i[1])
    
#len(feature2),len(target2)

x_train2,x_test2,y_train2,y_test2=train_test_split(feature2,target2,test_size=0.25,random_state=42)

clf1=SVC().fit(x_train2,y_train2)
y_pred2=clf1.predict(x_test2)

print("*-*"*100)
print(accuracy_score(y_test2,y_pred2)*100)
print("*-*"*100)

feature[10]

# Comparing Accuracy with colored image

In [None]:
dir2="C:\Users\Muhammad Talha\Desktop\Project"
catg=['Banana Bacterial Wilt','Black sigatoka disease','Healthy']

img_cube2=[]
image_size=500

for i in catg: #this will take the folder names as we call it as categoris
    path=os.path.join(dir2,i)
    label=catg.index(i)
    for j in os.listdir(path): #this will take the actual path of each folder image
        img_arr=cv2.imread(os.path.join(path,j))
        #gray_scale=cv2.cvtColor(img_arr, cv2.COLOR_BGR2GRAY )
        resized_img=cv2.resize(img_arr,(200,200))
        img_cube2.append([resized_img,label])

random.shuffle(img_cube2)

feature2=[]
target2=[]
for i in img_cube2:
    flat=i[0].flatten()
    feature2.append(flat)
for i in img_cube2:
    target2.append(i[1])
    
#len(feature2),len(target2)
feature2 = np.array(feature2)/ 255.0

x_train2,x_test2,y_train2,y_test2=train_test_split(feature2,target2,test_size=0.25,random_state=42)

clf1=SVC().fit(x_train2,y_train2)
y_pred2=clf1.predict(x_test2)

print("*-*"*100)
print(accuracy_score(y_test2,y_pred2)*100)
print("*-*"*100)


In [None]:
x_train.shape

# Data augmentation

In [None]:
data_aug=ImageDataGenerator(  rotation_range=40,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              channel_shift_range=0.2,
                              zoom_range=0.2,
                              fill_mode="nearest",
                              horizontal_flip=True,
                              vertical_flip=True,      
                             )         
s=data_aug.flow(x_train,y_train, batch_size = 32)
s[0][0][0]

In [None]:
#for BBW class
i=0
for batch in data_aug.flow_from_directory(directory="C:\Users\Muhammad Talha\Desktop\Project\",
                                          classes=['Banana Bacterial Wilt'],
                                          batch_size=32,
                                          target_size=(500,500),
                                          color_mode='rgb',
                                          save_to_dir='C:\Users\Muhammad Talha\Desktop\Project\Banana Bacterial Wilt',
                                          save_prefix='aug',
                                          save_format='png'):
    i += 1
    if i>19: #for a particular class it creates 20 image for image in that class
        break

In [None]:
#for BSD class
i=0
for batch in data_aug.flow_from_directory(directory="C:\Users\Muhammad Talha\Desktop\Project\",
                                          classes=['Black sigatoka disease'],
                                          batch_size=32,
                                          target_size=(500,500),
                                          color_mode='rgb',
                                          save_to_dir='C:\Users\Muhammad Talha\Desktop\Project\Black sigatoka disease',
                                          save_prefix='aug',
                                          save_format='png'):
    i += 1
    if i>19: #for a particular class it creates 20 image for image in that class
        break

In [None]:
#for healthy class
i=0
for batch in data_aug.flow_from_directory(directory="C:\Users\Muhammad Talha\Desktop\Project\",
                                          classes=['Healthy'],
                                          batch_size=32,
                                          target_size=(500,500),
                                          color_mode='rgb',
                                          save_to_dir='C:\Users\Muhammad Talha\Desktop\Project\Healthy',
                                          save_prefix='aug',
                                          save_format='png'):
    i += 1
    if i>19: #for a particular class it creates 20 image for image in that class
        break

# Extract the Data

In [None]:
dir1="C:\Users\Muhammad Talha\Desktop\Project"
catg=['Banana Bacterial Wilt','Black sigatoka disease','Healthy']

img_cube3=[]
#image_size=800

for i in catg: #this will take the folder names as we call it as categoris
    path=os.path.join(dir1,i)
    label=catg.index(i)
    for j in os.listdir(path): #this will take the actual path of each folder image
        img_arr=cv2.imread(os.path.join(path,j)) #convert rgb image to gray scale image
        #gray_img=cv2.cvtColor(img_arr, cv2.COLOR_BGR2GRAY )
        final_data=cv2.resize(img_arr,(100,100))
        img_cube3.append([final_data,label])
        
img_cube3[10]

In [None]:
feat3=[]
tar3=[]
for i,j in img_cube3:
    feat3.append(i)
    tar3.append(j)
x=np.array(feat3)
x=x/255.0 #as scaling the values between 0 to 1 becuz lesser the values faster will be the calculation
y=np.array(tar3)
plt.imshow(x[500],cmap='gray')

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=0,test_size=0.30)
x_train.shape

In [None]:
#y_train=to_categorical(y_train, dtype ="float64")
#y_test=to_categorical(y_test,  dtype ="float64")

In [None]:
y_test.shape,y_train.shape

In [None]:
x_train.dtype,y_train.dtype

In [None]:
#x_train2=x_train2.reshape(len(x_train2),200,200,1) #these two lines are not applicable for colored image we can directly fit those values
#x_test2=x_test2.reshape(len(x_test2),200,200,1)

# Data augmentation for a single image

In [None]:
# example of horizontal shift image augmentation
from numpy import expand_dims

# load the image
img = load_img("C:\Users\Muhammad Talha\Desktop\Project\Healthy\9.png")
# convert to numpy array
data = img_to_array(img)
# expand dimension to one sample
samples = expand_dims(data, 0)

# prepare iterator
it = data_aug.flow(samples, batch_size=9)
# generate samples and plot
plt.figure(figsize=(10,10))
for i in range(9):
    # define subplot
    pyplot.subplot(330 + 1 + i)
    # generate batch of images
    batch = it.next()
    # convert to unsigned integers for viewing
    image = batch[0].astype('uint8')
    # plot raw pixel data
    pyplot.imshow(image)
# show the figure
pyplot.show()

# Train Neural network model

In [None]:
cnn2=Sequential()

#convolutional layer-1
cnn2.add(Conv2D(32, kernel_size=(3, 3), activation='relu',  padding='same'))
cnn2.add(MaxPooling2D(2,2))

#convolutional layer-2
cnn2.add(Conv2D(64, kernel_size=(3, 3), activation='relu',  padding='same'))
cnn2.add(MaxPooling2D(2,2))

#convolutional layer-3
cnn2.add(Conv2D(128, kernel_size=(3, 3), activation='relu',  padding='same'))
cnn2.add(MaxPooling2D(2,2))


#convolutional layer-4
cnn2.add(Conv2D(256, kernel_size=(3, 3), activation='relu',  padding='same'))
cnn2.add(MaxPooling2D(2,2))


#flatten layer
cnn2.add(Flatten(input_shape=x_train.shape[1:] ))


cnn2.add(Dense(256,activation='relu'))

#densly connected layer
cnn2.add(Dense(128,activation='relu'))

#output layer
cnn2.add(Dense(3,activation='softmax'))


cnn2.compile(optimizer='Adagrad',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])
#logit=true when we not use any normalization for last layer that means if we don't apply any act. fun then keep logit=true0----
#with RMSPROP got 74-72%
#SGD found 57%



history2=cnn2.fit(  x_train,
          y_train,
          verbose=1,
          epochs=200,  #20
          batch_size=32,  #15
          validation_data=(x_test,y_test) )

# Observation

In [None]:
# Evaluating the Predictions on the Model

score = cnn2.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
y_pred=[]
for i in range(len(x_test)):
        y_pred.append(cnn2.predict(x_test[i].reshape(1,100,100,3)))
        
len(y_test),len(y_pred)

In [None]:
y_pred[:10]  #to get max probability value we use argmax

In [None]:
result=[]
for i in range(len(x_test)):
    result.append(np.argmax(y_pred[i],axis=1)) #it will take index value for which value is maxiumum of that array
result[:10]

In [None]:
cnn2.summary()


y_pred2=result

print("*-*"*40)
print("Accuracy Found: ",accuracy_score(y_test,np.array(y_pred2)))
#using adam accuracy was 83%
#using adadelta accuracy was 89%
print("*-*"*40)
print("Confusion Matrix:\n",confusion_matrix(y_test,np.array(y_pred2))) 
print("*-*"*40)
print("Classification Report:\n",classification_report(y_test,np.array(y_pred2)))
print("*-*"*40)

#plt.figure(figsize=(15,9))
plt.title('Loss Comparison',fontsize=20)
plt.plot(history2.history['loss'],)
plt.plot(history2.history['val_loss'])
plt.legend(['Training Loss','Validation Loss'])
plt.show()


#plt.figure(figsize=(15,9))
plt.title('Accuracy comparison ',fontsize=20)
plt.plot(history2.history['accuracy'],)
plt.plot(history2.history['val_accuracy'])
plt.legend(['Training Acc','Validation Acc'])
plt.show()

In [None]:
#pickle and joblib
from keras.models import save_model
#model.save("Banana_leaf_classification.h5") 


# Save the model
#filepath = 'C:\Users\Muhammad Talha\Desktop\Project'
#save_model(cnn2, filepath)


cnn2.save("BananaLeaf_classifier.h5")

In [None]:
#path=r'C:\Users\Muhammad Talha\Desktop\Project\Deploy'
cnn2.save("C:/Users/sahoo/Desktop/Banana_leaf_disease/Deploy/BananaLeaf_classifier.h5")

# Model Testing

In [None]:
# Load the model
from keras.models import load_model

#filepath = 'C:\Users\Muhammad Talha\Desktop\Project'
mymodel = load_model("BananaLeaf_classifier.h5")

In [None]:
mymodel.summary()

In [None]:
def prediction(result,model): #type of result should be an array
    arr=cv2.resize(result,(100,100))
    y_prediction= mymodel.predict(arr.reshape(1,100,100,3))
    result=np.argmax(y_prediction,axis=1) #take the index value of that array which value is maximum
    if result==0:
        print('It has a disease called Black Bacterial Wilt')
    elif result==1:
        print('It has a disease called Black Sigatoka Disease')
    elif result==2:
        print('Wohh!!! It is a healthy Leaf')
        
prediction(x_test[309],mymodel)

In [None]:
y_test[309]

# Model Deployment

In [None]:
# will be done in phase 2