In [None]:
# Report : Feb/18/2026

# This notebook focuses on training a deep convolutional neural network to identify plant diseases using the PlantVillage dataset.
# I employed Transfer Learning with the ResNet50 architecture.


# Environment Setup & Data Loading:
# Mounting Google Drive and extracting the pre-partitioned dataset (Train, Val, Test).
# Utilizing tf.keras.utils.image_dataset_from_directory  with a target size of 224x224.

# Feature Extraction:

# Phase 1 : Loading ResNet50 pre-trained on ImageNet.
# Freezing the base model weights and training only the custom top layers (Dense and Dropout).
# Initial performance reached ~97.8% validation accuracy after 5 epochs.

# Phase 2 : Fine-Tuning by Unfreezing the top 30 layers of the ResNet50 base.
# Re-training with a lower learning rate to adjust weights.
# Final validation accuracy stabilized at ~99.1%.

# Evaluation: The final model is evaluated the test data.
# Current Test Accuracy: 99.25%


In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [2]:
# Copying the zipfile of the processed images to Colab
!cp /content/gdrive/MyDrive/leaf_diagnosis_project/data/plantVillage_split_processed.zip /content/

In [3]:
# Unzipping the data in Colab disk
!unzip -q /content/plantVillage_split_processed.zip -d /content/data


replace /content/data/plantVillage_split_processed/test/Apple___Apple_scab/029424b0-0ef5-491b-9ef5-069190d24d8f___FREC_Scab 3504.JPG? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [4]:
# Sanity check that data exists on Colab disk
!ls /content/data/plantVillage_split_processed

test  train  val


In [5]:
base_dir = "/content/data/plantVillage_split_processed"
train_dir = base_dir + "/train"
val_dir   = base_dir + "/val"
test_dir  = base_dir + "/test"

In [6]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras import layers, models

#import tensorflow as tf
#from tensorflow.keras.preprocessing.image import ImageDataGenerator
#from tensorflow.keras.applications import ResNet50
#from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
#from tensorflow.keras.models import Model
#from tensorflow.keras.optimizers import Adam

In [7]:
#import tensorflow as tf

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(224,224),
    batch_size=32
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=(224,224),
    batch_size=32
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(224,224),
    batch_size=32,
    shuffle=False   # IMPORTANT for evaluation
)

Found 37997 files belonging to 38 classes.
Found 8129 files belonging to 38 classes.
Found 8179 files belonging to 38 classes.


In [8]:
class_names = train_ds.class_names
num_classes = len(class_names)

In [9]:
# Optimize data pipeline
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds   = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds  = test_ds.prefetch(buffer_size=AUTOTUNE)

In [10]:
!ls /content/data/plantVillage_split_processed/

test  train  val


In [11]:
# Save class indices

import json
with open("/content/leaf_class_indices.json", "w") as f:
    json.dump(class_names, f)

print("Class indices saved!")

Class indices saved!


In [12]:
# Load pre-trained ResNet50
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)
base_model.trainable = False  # freeze base

In [13]:
# Build full model

inputs = layers.Input(shape=(224,224,3))
x = preprocess_input(inputs)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',  # IMPORTANT
    metrics=['accuracy']
)

model.summary()

In [14]:

# Feature extraction training

EPOCHS_FE = 5
history_fe = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_FE
)


Epoch 1/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 114ms/step - accuracy: 0.8361 - loss: 0.5974 - val_accuracy: 0.9683 - val_loss: 0.0957
Epoch 2/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 109ms/step - accuracy: 0.9571 - loss: 0.1282 - val_accuracy: 0.9782 - val_loss: 0.0635
Epoch 3/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 109ms/step - accuracy: 0.9672 - loss: 0.0985 - val_accuracy: 0.9769 - val_loss: 0.0695
Epoch 4/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 109ms/step - accuracy: 0.9724 - loss: 0.0803 - val_accuracy: 0.9754 - val_loss: 0.0844
Epoch 5/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 108ms/step - accuracy: 0.9729 - loss: 0.0769 - val_accuracy: 0.9786 - val_loss: 0.0719


In [18]:
# Save feature-extraction model
import os
os.makedirs("/content/gdrive/MyDrive/leaf_diagnosis_project/models", exist_ok=True)
feature_model_path = "/content/gdrive/MyDrive/leaf_diagnosis_project/models/resnet_leaf_feature_extractor.keras"
model.save(feature_model_path)
print("Feature-extraction model saved!")

Feature-extraction model saved!


In [19]:
# Fine-tune ResNet (unfreeze top layers)

base_model.trainable = True
# Freeze lower layers if desired (optional)
for layer in base_model.layers[:-30]:
    layer.trainable = False

In [21]:
# Re-compile after changing trainable layers
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

EPOCHS_FT = 5
history_ft = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_FT
)

Epoch 1/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 158ms/step - accuracy: 0.9655 - loss: 0.1373 - val_accuracy: 0.9900 - val_loss: 0.0337
Epoch 2/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 146ms/step - accuracy: 0.9914 - loss: 0.0318 - val_accuracy: 0.9921 - val_loss: 0.0238
Epoch 3/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 146ms/step - accuracy: 0.9950 - loss: 0.0191 - val_accuracy: 0.9860 - val_loss: 0.0472
Epoch 4/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 146ms/step - accuracy: 0.9942 - loss: 0.0219 - val_accuracy: 0.9925 - val_loss: 0.0195
Epoch 5/5
[1m1188/1188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 161ms/step - accuracy: 0.9971 - loss: 0.0097 - val_accuracy: 0.9910 - val_loss: 0.0238


In [22]:
# Save fine-tuned model

final_model_path = "/content/gdrive/MyDrive/leaf_diagnosis_project/models/resnet_leaf_finetuned.keras"
model.save(final_model_path)
print("Fine-tuned model saved!")

Fine-tuned model saved!


In [23]:
# Evaluate on test set

test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc*100:.2f}%")

[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 108ms/step - accuracy: 0.9947 - loss: 0.0212
Test Accuracy: 99.25%
