In [None]:
# Dependency imports

import os
import zipfile
import requests
import random
import shutil
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing import image
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict

In [None]:
# Download the PlantVillage dataset zip file

url = "https://data.mendeley.com/public-files/datasets/tywbtsjrjv/files/d5652a28-c1d8-4b76-97f3-72fb80f94efc/file_downloaded"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)

if response.status_code == 200:
    with open("plantvillage.zip", "wb") as f:
        f.write(response.content)
    print("Download complete!")
else:
    print(f"Download failed! HTTP {response.status_code}")

In [None]:
# Extraction dataset zip file

zip_path = "plantvillage.zip"
extract_dir = "./plantvillage"

# Extract the dataset
if not os.path.exists(extract_dir):
    print("Extracting dataset...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_dir)
    print("Extraction complete!")
else:
    print("Folder already exists, skipping extraction.")

# Path to the actual dataset folder (update if needed)
dataset_root = './plantvillage/Plant_leave_diseases_dataset_without_augmentation'

# List plant disease categories
print("\nPlant Disease Categories:")
if os.path.exists(dataset_root):
    for disease_folder in sorted(os.listdir(dataset_root)):
        if os.path.isdir(os.path.join(dataset_root, disease_folder)):
            print(f"- {disease_folder}")
else:
    print("Dataset path not found! Check extraction folder structure.")


In [None]:
# Create balanced dataset directory

original_dir = './plantvillage/Plant_leave_diseases_dataset_without_augmentation'
balanced_dir = './plantvillage_balanced'
os.makedirs(balanced_dir, exist_ok=True)

# Image augmentation settings
datagen = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Count images per class
class_counts = {
    cls: len(os.listdir(os.path.join(original_dir, cls)))
    for cls in os.listdir(original_dir)
    if os.path.isdir(os.path.join(original_dir, cls))
}

max_count = max(class_counts.values())
print(f"Maximum images per class = {max_count}")

# Balance all classes
for cls in sorted(os.listdir(original_dir)):
    cls_path = os.path.join(original_dir, cls)
    if not os.path.isdir(cls_path):
        continue

    imgs = [f for f in os.listdir(cls_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    cls_bal_dir = os.path.join(balanced_dir, cls)
    os.makedirs(cls_bal_dir, exist_ok=True)

    # Copy original images
    for img in imgs:
        shutil.copy(os.path.join(cls_path, img), os.path.join(cls_bal_dir, img))

    # Generate augmented images
    i = 0
    while len(os.listdir(cls_bal_dir)) < max_count:
        img_name = random.choice(imgs)
        img_path = os.path.join(cls_path, img_name)
        img = Image.open(img_path).convert('RGB').resize((300, 300))
        x = np.expand_dims(np.array(img) / 255.0, axis=0)
        aug_img = next(datagen.flow(x, batch_size=1))[0]
        aug_img = Image.fromarray((aug_img * 255).astype(np.uint8))
        aug_img.save(os.path.join(cls_bal_dir, f"aug_{i}_{img_name}"))
        i += 1

print("Balanced dataset created at:", balanced_dir)


In [None]:
# Split balanced dataset into train, val, test

# Paths
balanced_dir = './plantvillage_balanced'
split_dir = './plantvillage_split'
os.makedirs(split_dir, exist_ok=True)

# Split ratios
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Function to create split directories
def make_dirs(base_dir, cls):
    train_cls = os.path.join(base_dir, 'train', cls)
    val_cls = os.path.join(base_dir, 'val', cls)
    test_cls = os.path.join(base_dir, 'test', cls)
    os.makedirs(train_cls, exist_ok=True)
    os.makedirs(val_cls, exist_ok=True)
    os.makedirs(test_cls, exist_ok=True)
    return train_cls, val_cls, test_cls

# Loop through each class and split
for cls in sorted(os.listdir(balanced_dir)):
    cls_path = os.path.join(balanced_dir, cls)
    if not os.path.isdir(cls_path):
        continue

    images = [f for f in os.listdir(cls_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(images)

    n_total = len(images)
    n_train = int(train_ratio * n_total)
    n_val = int(val_ratio * n_total)
    # Remaining goes to test
    n_test = n_total - n_train - n_val

    train_cls, val_cls, test_cls = make_dirs(split_dir, cls)

    # Copy images to respective folders
    for i, img in enumerate(images):
        src = os.path.join(cls_path, img)
        if i < n_train:
            dst = os.path.join(train_cls, img)
        elif i < n_train + n_val:
            dst = os.path.join(val_cls, img)
        else:
            dst = os.path.join(test_cls, img)
        shutil.copy(src, dst)

print("Dataset split into train, val, test at:", split_dir)

In [None]:
# Model Training and Evaluation Script

# ======================
# === Parameters =======
# ======================
IMG_WIDTH, IMG_HEIGHT = 224, 224
BATCH_SIZE = 32
NUM_EPOCHS = 5
NUM_TRAINABLE_LAYERS = 4  # last N layers to fine-tune (0 = no fine-tuning)
LEARNING_RATE = 1e-3

# ======================
# === Dataset Paths ====
# ======================
TRAIN_DATA_DIR = "./plantvillage_split/train"
VALIDATION_DATA_DIR = "./plantvillage_split/val"
TEST_DATA_DIR = "./plantvillage_split/test"

# ======================
# === Dynamic Class Detection ===
# ======================
class_labels = sorted([d for d in os.listdir(TRAIN_DATA_DIR) 
                       if os.path.isdir(os.path.join(TRAIN_DATA_DIR, d))])
NUM_CLASSES = len(class_labels)
print(f"Detected {NUM_CLASSES} classes: {class_labels}")

# ======================
# === Data Generators ===
# ======================
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
val_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

validation_generator = val_test_datagen.flow_from_directory(
    VALIDATION_DATA_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator = val_test_datagen.flow_from_directory(
    TEST_DATA_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

# ======================
# === Build Model ======
# ======================
base_model = EfficientNetB0(weights='imagenet', include_top=False,
                            input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
base_model.summary()

# Freeze all layers
for layer in base_model.layers:
    layer.trainable = False

# Unfreeze last N layers for fine-tuning
if NUM_TRAINABLE_LAYERS > 0:
    for layer in base_model.layers[-NUM_TRAINABLE_LAYERS:]:
        layer.trainable = True

# Build the full model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(NUM_CLASSES, activation='softmax')
])

# ======================
# === Compile Model ====
# ======================
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

# ======================
# === Callbacks ========
# ======================
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                         patience=3, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5,
                                     restore_best_weights=True, verbose=1),
    tf.keras.callbacks.ModelCheckpoint("efficientnet_plantvillage_best.keras",
                                       monitor='val_accuracy',
                                       save_best_only=True, verbose=1)
]

# ======================
# === Train Model ======
# ======================
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=NUM_EPOCHS,
    callbacks=callbacks
)

# ======================
# === Plot Training History ===
# ======================
def plot_training_history(history):
    plt.figure(figsize=(12, 5))
    
    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    
    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

plot_training_history(history)

# ======================
# === Evaluate Model ===
# ======================
loss, accuracy = model.evaluate(test_generator)
print(f"\nTest Accuracy: {accuracy:.4f}")

# ======================
# === Confusion Matrix ===
# ======================
y_true = test_generator.classes
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(16, 14))
sns.heatmap(cm, annot=False, fmt='d', cmap='Blues',
            xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# ======================
# === Classification Report ===
# ======================
report = classification_report(y_true, y_pred, target_names=class_labels)
print("Classification Report:\n")
print(report)

# ======================
# === Per-Class Accuracy ===
# ======================
class_correct = defaultdict(int)
class_total = defaultdict(int)

for true, pred in zip(y_true, y_pred):
    class_total[class_labels[true]] += 1
    if true == pred:
        class_correct[class_labels[true]] += 1

per_class_accuracy = {cls: class_correct[cls] / class_total[cls] for cls in class_labels}

# Plot per-class accuracy
plt.figure(figsize=(16,6))
plt.bar(per_class_accuracy.keys(), per_class_accuracy.values())
plt.xticks(rotation=90)
plt.ylabel("Accuracy")
plt.title("Per-Class Accuracy")
plt.ylim(0, 1)
plt.grid(axis='y')
plt.show()

# Optional: print per-class accuracy sorted
print("Per-Class Accuracy:\n")
for cls, acc in sorted(per_class_accuracy.items(), key=lambda x: x[1]):
    print(f"{cls}: {acc:.4f}")

# ======================
# === Save Model =======
# ======================
model.save("efficientnet_plantvillage_final.keras")


In [None]:
# Loading model for testing 

def predict_image_with_percentages(img_path, model, class_names):
    # Load and preprocess
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    # Predict
    preds = model.predict(img_array)[0]

    # Print results
    print("\n========= Prediction Result =========")
    for cls, prob in zip(class_names, preds):
        print(f"{cls:25s} : {prob*100:.2f}%")

    predicted_class = class_names[np.argmax(preds)]
    confidence = np.max(preds) * 100

    print("\nMost Likely Class:", predicted_class)
    print(f"Confidence: {confidence:.2f}%")

    return predicted_class, preds

In [None]:
model = tf.keras.models.load_model("efficientnet_plantvillage_final.keras")

print("Model loaded successfully!")


# Load class names
TEST_DATA_DIR = "./plantvillage/Plant_leave_diseases_dataset_without_augmentation"
class_names = sorted(os.listdir(TEST_DATA_DIR))

# Predict
predict_image_with_percentages("real_time_testing/images2.jpg", model, class_names)

