<a href="https://colab.research.google.com/github/Yonad91/-Plant-Disease-Detection/blob/main/PlantDiseaseDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install tensorflow gradio kaggle -q

import os
import zipfile
import shutil
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import gradio as gr
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# -------------------------
# Constants / Paths
# -------------------------
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
KAGGLE_DATASET = 'vipoooool/new-plant-diseases-dataset'  # Change dataset if desired
WORK_DIR = '/content'
DATASET_ROOT = os.path.join(WORK_DIR, 'New_Plant_Diseases_Dataset')
TRAIN_DIR = os.path.join(DATASET_ROOT, 'train')
VALID_DIR = os.path.join(DATASET_ROOT, 'valid')
TEST_DIR = os.path.join(DATASET_ROOT, 'test')  # may or may not exist


In [None]:
# -------------------------
# Step 1: Upload kaggle.json and download dataset
# -------------------------
print("Step 1: Please browse and upload your 'kaggle.json' API token when prompted.")

try:
    from google.colab import files
    uploaded = files.upload()
    if not uploaded:
        raise FileNotFoundError("kaggle.json not uploaded.")
    kaggle_json_filename = list(uploaded.keys())[0]
    kaggle_json_path = os.path.join(WORK_DIR, kaggle_json_filename)

    os.makedirs(os.path.expanduser('~/.kaggle'), exist_ok=True)
    shutil.copy(kaggle_json_path, os.path.expanduser('~/.kaggle/kaggle.json'))
    os.chmod(os.path.expanduser('~/.kaggle/kaggle.json'), 0o600)

    print("Step 2: Kaggle API key successfully configured.")
    print("Step 3: Starting download of dataset...")
    os.system(f'kaggle datasets download -d {KAGGLE_DATASET} -p {WORK_DIR} --force')

    zip_files = [f for f in os.listdir(WORK_DIR) if f.endswith('.zip')]
    if not zip_files:
        raise FileNotFoundError("Downloaded dataset ZIP file not found.")

    zip_path = os.path.join(WORK_DIR, zip_files[0])
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(WORK_DIR)

    # Locate train folder in extracted dataset
    def find_folder_with_subdir(root, target_subdir='train'):
        for dirpath, dirnames, filenames in os.walk(root):
            if target_subdir in dirnames:
                return os.path.join(dirpath, target_subdir), dirpath
        return None, None

    train_found, parent = find_folder_with_subdir(WORK_DIR, 'train')
    if train_found:
        extracted_root = parent
        try:
            shutil.move(extracted_root, DATASET_ROOT)
        except:
            DATASET_ROOT = extracted_root
        TRAIN_DIR = os.path.join(DATASET_ROOT, 'train')
        VALID_DIR = os.path.join(DATASET_ROOT, 'valid')
        TEST_DIR = os.path.join(DATASET_ROOT, 'test')

        try: os.remove(zip_path)
        except: pass

        print("Dataset ready at:", DATASET_ROOT)
    else:
        raise FileNotFoundError("Could not find 'train' folder in extracted dataset.")

except Exception as e:
    raise RuntimeError(f"Dataset download/extraction failed: {e}")


In [None]:
# -------------------------
# Step 2: Create tf.data datasets from directories
# -------------------------
if not os.path.exists(TRAIN_DIR):
    raise FileNotFoundError(f"Training directory not found: {TRAIN_DIR}")

AUTOTUNE = tf.data.AUTOTUNE

def make_datasets(train_dir, valid_dir=None, test_dir=None, img_size=(224,224), batch_size=32):
    if valid_dir and os.path.exists(valid_dir):
        train_ds = image_dataset_from_directory(train_dir, labels='inferred', label_mode='int',
                                                batch_size=batch_size, image_size=img_size, shuffle=True)
        val_ds = image_dataset_from_directory(valid_dir, labels='inferred', label_mode='int',
                                              batch_size=batch_size, image_size=img_size, shuffle=False)
    else:
        train_ds = image_dataset_from_directory(train_dir, labels='inferred', label_mode='int',
                                                batch_size=batch_size, image_size=img_size, shuffle=True,
                                                validation_split=0.15, subset='training', seed=123)
        val_ds = image_dataset_from_directory(train_dir, labels='inferred', label_mode='int',
                                              batch_size=batch_size, image_size=img_size, shuffle=False,
                                              validation_split=0.15, subset='validation', seed=123)

    if test_dir and os.path.exists(test_dir):
        test_ds = image_dataset_from_directory(test_dir, labels='inferred', label_mode='int',
                                               batch_size=batch_size, image_size=img_size, shuffle=False)
    else:
        test_ds = val_ds

    normalization_layer = layers.Rescaling(1./255)
    train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)).prefetch(AUTOTUNE)
    val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y)).prefetch(AUTOTUNE)
    test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y)).prefetch(AUTOTUNE)
    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = make_datasets(TRAIN_DIR, VALID_DIR, TEST_DIR, IMAGE_SIZE, BATCH_SIZE)

try:
    CLASS_NAMES = train_ds.class_names
except:
    CLASS_NAMES = sorted([d for d in os.listdir(TRAIN_DIR) if os.path.isdir(os.path.join(TRAIN_DIR, d))])
NUM_CLASSES = len(CLASS_NAMES)
print("Detected classes:", NUM_CLASSES, CLASS_NAMES)


In [None]:
# -------------------------
# Step 3: Build VGG16 + custom head
# -------------------------
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [None]:
# -------------------------
# Step 4: Train model
# -------------------------
EPOCHS = 10
STEPS_PER_EPOCH = 1000
VALIDATION_STEPS = math.ceil(tf.data.experimental.cardinality(val_ds).numpy())

checkpoint_cb = ModelCheckpoint('best_model_plant.h5', save_best_only=True, monitor='val_accuracy', verbose=1)
reduce_lr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-8, verbose=1)
csv_logger_cb = CSVLogger('training_log_plant.csv', append=False)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_steps=VALIDATION_STEPS,
    callbacks=[checkpoint_cb, reduce_lr_cb, csv_logger_cb],
    verbose=1
)


In [None]:
# -------------------------
# Step 5: Plot and evaluate
# -------------------------
acc = history.history.get('accuracy', [])
val_acc = history.history.get('val_accuracy', [])
loss = history.history.get('loss', [])
val_loss = history.history.get('val_loss', [])

plt.figure(figsize=(12,5))
plt.subplot(1,2,1); plt.plot(acc, label='Train Acc'); plt.plot(val_acc, label='Val Acc'); plt.legend(); plt.title('Accuracy')
plt.subplot(1,2,2); plt.plot(loss, label='Train Loss'); plt.plot(val_loss, label='Val Loss'); plt.legend(); plt.title('Loss')
plt.show()

y_true, y_pred = [], []
for imgs, labels in test_ds:
    preds = model.predict(imgs)
    y_true.extend(labels.numpy().tolist())
    y_pred.extend(np.argmax(preds, axis=1).tolist())

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=CLASS_NAMES))

print("Confusion Matrix:")
cm = confusion_matrix(y_true, y_pred)
print(cm)


In [None]:
# -------------------------
# Step 6: Save model and Gradio interface
# -------------------------
MODEL_FILENAME = "plant_disease_vgg16_final.h5"
model.save(MODEL_FILENAME)
print(f"\nModel saved as {MODEL_FILENAME}")

def predict_image_gradio(image: np.ndarray):
    if image is None:
        return "No image", "", ""
    img = tf.image.resize(image, IMAGE_SIZE)
    img = tf.expand_dims(img, 0) / 255.0
    preds = model.predict(img)
    idx = int(np.argmax(preds))
    confidence = float(np.max(preds))
    label = CLASS_NAMES[idx]
    description = f"Predicted class: {label}. Confidence: {confidence*100:.2f}%."
    return label, f"{confidence*100:.2f}%", description

demo = gr.Interface(
    fn=predict_image_gradio,
    inputs=gr.Image(type="numpy", label="Upload plant image"),
    outputs=[gr.Textbox(label="Prediction"), gr.Textbox(label="Confidence"), gr.Textbox(label="Notes")],
    title="Plant Disease Detection (VGG16 Transfer Learning)",
    description="Upload an image for inference."
)
demo.launch()
