# 🍅 Plant Disease Detection with Deep Learning

This project focuses on detecting plant diseases using deep learning and image classification.

- **Dataset**: [PlantVillage Dataset](https://data.mendeley.com/datasets/tywbtsjrjv/1), includes all 39 classes.
- **Objective**: Build a convolutional neural network (CNN) model using **transfer learning** to classify plant leaves into their respective disease categories or as healthy.
- **Techniques Used**:
  - Data preprocessing and augmentation
  - Transfer learning with pretrained CNNs ( VGG16)
  - Model evaluation and accuracy analysis

This project helps demonstrate how deep learning can be applied in agriculture to aid early disease detection and improve crop health monitoring.


In [None]:
# import necessary libraries
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, MobileNetV2, InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
# from tensorflow.keras.callbacks import EarlyStopping 
from tensorflow.keras.optimizers import Adam

In [2]:
# build data generator for training

# normalize image and carryout data generation
train_datagen = ImageDataGenerator(rescale=1./255,          # normalize pixel values        
                               rotation_range=20,       # randomly rotate images by up to 20 degrees
                               zoom_range=0.2,          # randomly zoom images
                               horizontal_flip=True     # randomly flip images horizontally
                               )

# define data generator for validation and test sets
datagen = ImageDataGenerator(rescale=1./255)

# load training, validation and testing images from directory
train_data = train_datagen.flow_from_directory("plant_leave_disease/train",    # file path
                                              target_size=(224, 224),               # resize images to fit transfer model input
                                              batch_size=64,                         # process 64 images at a time, for faster training
                                              class_mode="categorical"              # one hot encode labels
                                              )

val_data = datagen.flow_from_directory("plant_leave_disease/val",
                                        target_size=(224, 224),
                                        batch_size=64,
                                        class_mode="categorical"
                                        )

test_data = datagen.flow_from_directory("plant_leave_disease/test",
                                        target_size=(225, 225),
                                        batch_size=64,
                                        class_mode="categorical",
                                        shuffle=False                   # do not shuffle testing data
                                        )

Found 38797 images belonging to 39 classes.
Found 11077 images belonging to 39 classes.
Found 5574 images belonging to 39 classes.


## VGG16 Models

In [8]:
# define model architecture
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.3)(x)
predictions = Dense(39, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=predictions)

# freeze base model
for layer in base_model.layers:
    layer.trainable = False

In [9]:
# check model summary
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [10]:
# compile model
model.compile(optimizer=Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# define early stoppage parameters
#early_stopping = EarlyStopping(monitor="val_loss", patience=8)

# fit model on training data. set epoch to 5
model.fit(train_data, validation_data=val_data, epochs=5)

# uncomment for more epochs and early stopping
#model.fit(train_data, validation_data=val_data, epochs=100, callbacks=[early_stopping])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x127fa82e950>

In [11]:
loss, accuracy = model.evaluate(test_data)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 78.24%


## VGG16 using last few layers

In [3]:
# define model architecture
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.3)(x)
predictions = Dense(39, activation="softmax")(x)

vgg_model2 = Model(inputs=base_model.input, outputs=predictions)

# freeze base model
for layer in base_model.layers:
    layer.trainable = False

for layer in base_model.layers[-2:]:
    layer.trainable = True

In [4]:
vgg_model2.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [5]:
# compile model
vgg_model2.compile(optimizer=Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# define early stoppage parameters
#early_stopping = EarlyStopping(monitor="val_loss", patience=8)

# fit model on training data. set epoch to 5
vgg_model2.fit(train_data, validation_data=val_data, epochs=5)

# uncomment for more epochs and early stopping
#model.fit(train_data, validation_data=val_data, epochs=100, callbacks=[early_stopping])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x203d252d600>

In [7]:
loss, accuracy = vgg_model2.evaluate(test_data)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 93.51%
