# **Tomato Disease Detection**

## **Introduction**:

> #### *Problem Statement*:
> > Few farmers are facing with issue with in the farm. There are lots of problems in the agricultural sector in the perspective of the farmer, but we can help them by integrating the AI Technology. Even after selecting and growing the crop in the field, few
crop diseases are not identified by the farmer which results to decrease in the crop yield. This is the main problem in the field so we are going to solve this issue by developing a model which can recognize the disease that is caused by the input of an image of the diseased plant. By
tackling this, we can increase the crop yield and maintain the crop production.

> #### *Overview of the project*:
> > This project involves building and loading the data, **Plant Village Dataset**, Exploratory Data Analysis, Model Building and Training.  
> #### *Goal of this project*:
> > The goal of this project is to build model for tomato crop and to observe the performance metrics of the model
> #### *Result*:
> > With this we can hence use the model for any web application for tomato crop managment systems or Tomato plant Disease detection system.

In [None]:
%%bash
#install kaggle
pip install -q kaggle

#create a Kaggle folder andcopy kaggle.json to copied folder
echo '{"username":"disha1503","key":"3d1810121b6c88f023679868aa91845b"}' > ~/.kaggle/kaggle.json

#permission for json to act
chmod 600 ~/.kaggle/kaggle.json

#downloading the dataset
kaggle datasets download -d shylesh101/tomato-leaf-disease

# unziping the dataset
unzip tomato-leaf-disease.zip

# installing the tensorflow library
pip install tensorflow


### Importing all the required libraries

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
from tensorflow.python.client import device_lib
device_lib.list_local_devices()
import os
from tensorflow.keras.utils import image_dataset_from_directory
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
import math
from tensorflow import keras
# import tensorflow_addons as tfa
from tensorflow.keras import layers
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from PIL import Image, ImageEnhance
import random
import cv2
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm
from tensorflow.keras.utils import plot_model
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard
import tensorflow_hub as hub
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
import warnings
warnings.filterwarnings('ignore')

## 1. Exploratory Data Analysis

### Defining a function to get the path for folder paths

In [None]:
train_data_dir = '/content/tomato_dataset/train'
test_data_dir = '/content/tomato_dataset/test'
val_data_dir = '/content/tomato_dataset/valid'

In [None]:
def get_path(plant_dir:str, dir_test:str):
    if dir_test == 'Test':
        return test_data_dir
    elif dir_test == 'Train':
        return train_data_dir
    elif dir_test == 'Valid':
        return val_data_dir
plant_path = '/content/tomato_dataset'
plant_dirs = os.listdir("/content/tomato_dataset")
plant_dirs

### Printing the information about the Training set, the directories and the images

In [None]:
img_dim = (256,256)
batch_size = 32
num_channels = 3
input_size = (batch_size, img_dim[0], img_dim[1], num_channels)
train_dataset = {}

print("-_-_-_-_-_-_-_-_-_-_Images & Classes for Training-_-_-_-_-_-_-_-_-_-_")
for plant in plant_dirs:
    print(f'>>> No of Images & Classes in "{plant}" directory')
    train_dataset[plant] = image_dataset_from_directory(get_path(plant, "Train"),
                                                        shuffle=True,
                                                        labels = 'inferred',
                                                        label_mode = 'int',
                                                        image_size = img_dim,
                                                        batch_size = batch_size)

### Printing the disease names of each plant

In [None]:
classes  ={}
for plant in plant_dirs:
    print(f'>>> Classes in "{plant}" dataset :-')
    classes[plant] = []
    for num, cat in enumerate(train_dataset[plant].class_names, start=1):
        classes[plant].append(cat)
        print(num, cat)
    print("\n")

### Plotting few random samples from each plant directory

In [None]:
for plant in plant_dirs:
    print(f'>>>> Sample Images of "{plant}" dataset')
    plt.figure(figsize=(14,5))
    for image_batch, image_label in train_dataset[plant].take(1):
        for i in range(10):
            plt.subplot(2,5,i+1)
            plt.imshow(image_batch[i].numpy().astype('uint8'))
            plt.title(classes[plant][image_label[i]])
            plt.axis('off')
        plt.show()
    print("\n\n")

In [None]:
# getting values of training dataset
train_dataset.values()

### Plotting the standardized images for random plant leaf images

In [None]:
# Creating a function for standardizing images
def std_img(img):
    img_flat = img.reshape(-1,3)
    scaler = StandardScaler()
    img_std = scaler.fit_transform(img_flat)
    img_std = img_std.reshape(256,256,3)
    return img_std

# getting the standardized images with labels
tomato_img = []
tomato_label = []
for img,label in train_dataset[plant].take(1):
    for i in range(5):
        tomato_img.append(img[i])
        tomato_label.append(classes["test"][label[i]])

for i in range(5):
    # typcasting
    nik = np.array(tomato_img[i]).astype('uint8')
    img_std = std_img(nik)


    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(nik)
    plt.title("Tomato "+tomato_label[i])
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(img_std)
    plt.title('Standardized Image')
    plt.axis('off')
plt.show()

### Plotting the 32X32 patch format of a image

In [None]:
image_size = 224
img_height, img_width = 512, 512
patch_size = 32
num_patches = (image_size // patch_size) ** 2
class Patches(layers.Layer):

    def __init__(self, patch_size):
        super(Patches, self).__init__()
        super(Patches, self).__init__()
        self.patch_size = patch_size
    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches


import matplotlib.pyplot as plt
import matplotlib.image as mpimg
plt.figure(figsize=(4, 4))

target = "/content/tomato_dataset/train/Tomato___Bacterial_spot"
random_num = random.choice(os.listdir(target))
image = mpimg.imread("/content/tomato_dataset/train/Tomato___Bacterial_spot/" + random_num)
plt.imshow(image.astype("uint8"))
plt.axis("off")
resized_image = tf.image.resize(
    tf.convert_to_tensor([image]), size=(image_size, image_size)
)
patches = Patches(patch_size)(resized_image)
print(f"Image size: {image_size} X {image_size}")
print(f"Patch size: {patch_size} X {patch_size}")
print(f"Patches per image: {patches.shape[1]}")
print(f"Elements per patch: {patches.shape[-1]}")
n = int(np.sqrt(patches.shape[1]))
plt.figure(figsize=(4, 4))
for i, patch in enumerate(patches[0]):
    ax = plt.subplot(n, n, i + 1)
    patch_img = tf.reshape(patch, (patch_size, patch_size, 3))
    plt.imshow(patch_img.numpy().astype("uint8"))
    plt.axis("off")


# **Model Development**

### Declaring the paths for the training, testing, validation path, labels for the given plant diseases and the output length

In [None]:
train_path = f"{plant_path}/train"
val_path = f"{plant_path}/valid"
test_path = f"{plant_path}/test"
out_labels = os.listdir(f"{plant_path}/train/")
out_len = len(out_labels)

In [None]:
out_len

### Declaring the batch size of 64 and the image size of 224X224 pixels


In [None]:
batch_size = 64
img_height = 224

### Declare the ImageDataGenerator for the train_datagen and test_datagen, val_datagen. For train_datagen the images are augumented and for all the three datagenerators the pixels are scaled

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255.,rotation_range=20,shear_range=0.15,horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)

### Declaring the train, test and valid sets for the input to the model, making every image to be of the size 224X224 pixels


In [None]:
train_set = train_datagen.flow_from_directory(train_path,target_size = (224,224),batch_size = 64,shuffle = True,class_mode = 'categorical')
val_set = val_datagen.flow_from_directory(val_path,target_size = (224, 224),batch_size = 64,shuffle = False,class_mode = 'categorical')
test_set = val_datagen.flow_from_directory(test_path,target_size = (224, 224),batch_size = 64,shuffle = False,class_mode = 'categorical')

### Loading the pretrained VIT model with RESNET50 as backbone with 32 patch size and pretrained on ImageNet-21k dataset. Building a classifier head over the pretrained VIT Model with out_len as the output shape which is equal to the number of disease of plant

In [None]:
fe_L2 = hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_r50_l32_fe/1",input_shape = (224,224,3),trainable = False,name = "Pre_Trained_")
VIT = tf.keras.Sequential([
    fe_L2,
    layers.Dense(128,activation = "relu"),
    layers.Dropout(0.5),
    layers.Dense(out_len, activation = "softmax", name = "output_layer")
])

VIT.compile(loss = "categorical_crossentropy",optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),metrics = ["accuracy"])

### Trianing the model for 10 epochs

In [None]:
epochs = 10
r=VIT.fit(train_set,epochs = epochs,validation_data = val_set,steps_per_epoch=len(train_set),validation_steps = len(val_set))

### Saving the model

In [None]:
# models_name = "plant_disease_detection"+"_model_keras.keras"
VIT.save(plant_disease_model.h5)

In [None]:
loaded_model = tf.keras.models.load_model(plant_disease_model.h5)

### To make the model file to be availabel for download, run the following code

In [None]:
!zip -r model.zip /content/plant_disease_detection_model2.zip

In [None]:
!cp /content/plant_disease_detection_model_keras.keras /content/drive/MyDrive/cnnModel/

In [None]:
!cp /content/model.zip /content/model1.zip


In [None]:
!mv /content/model1.zip /content/model_h5.h5

In [1]:
# %%shell
# print('hello')
# pip install  

hello


In [6]:
# import tensorflow as tf

# # Load the model using custom_objects
# model_path = 'C:\pythonProject1\plant_disease_detection_model_keras.keras'
# model_with_dropout = tf.keras.models.load_model(model_path)
import tensorflow as tf
import tensorflow_hub as hub

# Path to the model file
model_path = './plant_disease_detection_model_keras.keras'

model= tf.keras.models.load_model(
    model_path,
    custom_objects= { 'KerasLayer' : hub.KerasLayer }
)

TypeError: <class 'keras.src.models.sequential.Sequential'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'keras', 'class_name': 'Sequential', 'config': {'name': 'sequential', 'layers': [{'module': 'keras.layers', 'class_name': 'InputLayer', 'config': {'batch_input_shape': [None, 224, 224, 3], 'dtype': 'float32', 'sparse': False, 'ragged': False, 'name': 'Pre_Trained__input'}, 'registered_name': None}, {'module': 'tensorflow_hub.keras_layer', 'class_name': 'KerasLayer', 'config': {'name': 'Pre_Trained_', 'trainable': False, 'dtype': 'float32', 'batch_input_shape': [None, 224, 224, 3], 'handle': 'https://tfhub.dev/sayakpaul/vit_r50_l32_fe/1'}, 'registered_name': 'KerasLayer', 'build_config': {'input_shape': [None, 224, 224, 3]}}, {'module': 'keras.layers', 'class_name': 'Dense', 'config': {'name': 'dense', 'trainable': True, 'dtype': 'float32', 'units': 128, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 1024]}}, {'module': 'keras.layers', 'class_name': 'Dropout', 'config': {'name': 'dropout', 'trainable': True, 'dtype': 'float32', 'rate': 0.5, 'noise_shape': None, 'seed': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 128]}}, {'module': 'keras.layers', 'class_name': 'Dense', 'config': {'name': 'output_layer', 'trainable': True, 'dtype': 'float32', 'units': 10, 'activation': 'softmax', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 128]}}]}, 'registered_name': None, 'build_config': {'input_shape': [None, 224, 224, 3]}, 'compile_config': {'optimizer': {'module': 'keras.optimizers', 'class_name': 'Adam', 'config': {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': 9.999999747378752e-05, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}, 'registered_name': None}, 'loss': 'categorical_crossentropy', 'metrics': ['accuracy'], 'loss_weights': None, 'weighted_metrics': None, 'run_eagerly': None, 'steps_per_execution': None, 'jit_compile': None}}.

Exception encountered: <class 'tensorflow_hub.keras_layer.KerasLayer'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'tensorflow_hub.keras_layer', 'class_name': 'KerasLayer', 'config': {'name': 'Pre_Trained_', 'trainable': False, 'dtype': 'float32', 'batch_input_shape': [None, 224, 224, 3], 'handle': 'https://tfhub.dev/sayakpaul/vit_r50_l32_fe/1'}, 'registered_name': 'KerasLayer', 'build_config': {'input_shape': [None, 224, 224, 3]}}.

Exception encountered: Error when deserializing class 'KerasLayer' using config={'name': 'Pre_Trained_', 'trainable': False, 'dtype': 'float32', 'batch_input_shape': [None, 224, 224, 3], 'handle': 'https://tfhub.dev/sayakpaul/vit_r50_l32_fe/1'}.

Exception encountered: <urlopen error [Errno -2] Name or service not known>

In [None]:
r.history.keys()

### Predicting the test_set for the model

In [None]:
Y_pred = VIT.predict(test_set, steps = len(test_set) )
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
y_pred

### Plotting the confusion matrix for given y_pred and y_true

In [None]:
cf = confusion_matrix(test_set.classes, y_pred)
list = os.listdir(f"{plant_path}/train")
plt.figure(figsize=(8, 6))
sns.heatmap(cf, annot=True, fmt='d', cmap='Blues',xticklabels=out_labels,yticklabels=out_labels)
plt.title(f'Confusion Matrix of {plant} with Accuracy : {accuracy_score(test_set.classes, y_pred) * 100:.2f}%')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt_name = plant+"_CF.png"
plt.savefig(plt_name)
plt.show()

### Printing the classification report

In [None]:
print('-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_>>>>Classification Report<<<<-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_')
print(classification_report(test_set.classes, y_pred, target_names=out_labels))

### Plotting the model Accuracy and Loss Graphs

In [None]:
epochs = [i for i in range(10)]
fig , ax = plt.subplots(1,2)
train_acc = r.history['accuracy']
train_loss = r.history['loss']
val_acc = r.history['val_accuracy']
val_loss = r.history['val_loss']
fig.set_size_inches(16,9)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Training & Validation Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Loss")

plt.show()

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = VIT.evaluate(test_set)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

In [None]:
plt.figure(figsize=(20,20))
for image_batch , image_label in test_set.take(1):
    for i in range(20):
        plt.subplot(5,4,i+1)
        plt.imshow(image_batch[i].numpy().astype("uint8"))
        plt.title(class_label[image_label[i].numpy()])
        plt.axis("off")

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.resnet50 import preprocess_input
val_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet.preprocess_input)

# Flow training and validation data in batches using generators
train_gen = val_datagen.flow_from_directory(test_data_dir, target_size=(224, 224), batch_size=32, class_mode='categorical')
# Define the class names
class_names = train_dataset['train'].class_names
# model_with_dropout = loaded_model
# Function to preprocess a single image
def preprocess_image(img):
    img = tf.image.resize(img, (224, 224))  # Resize to the input shape required by the model
    img = preprocess_input(img)  # Preprocess the image
    img = tf.expand_dims(img, 0)  # Add batch dimension
    return img

# Function to predict the class and confidence
def predict(img):
    img_array = preprocess_image(img)  # Preprocess the image
    predictions = model_with_dropout.predict(img_array)
    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

# Load the model
# model_path = '/content/cnnResnet50Model.h5'
# model_with_dropout = tf.keras.models.load_model(model_path)

# Plotting the images with predictions
plt.figure(figsize=(15, 15))

for images, labels in train_gen:
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        image = images[i]
        plt.imshow(image.astype("uint16"))

        predicted_class, confidence = predict(image)
        index = labels[i].argmax(axis=0)
        actual_class = class_names[index]

        plt.title(f"Actual: {actual_class}\nPredicted: {predicted_class}\nConfidence: {confidence}%")
        plt.axis("off")
    break  # Break after the first batch

plt.show()


In [None]:
classes.get('train')