<a href="https://colab.research.google.com/github/Yonad91/-Plant-Disease-Detection/blob/main/SelectedTopics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import os
import zipfile
import shutil
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input
import matplotlib.pyplot as plt

# --- 1. Configure Kaggle API using Colab Uploader (REVISED) ---
print("1. Configuring Kaggle API...")
try:
    from google.colab import files

    # This will open a file selection dialog. Please choose your 'kaggle.json' file.
    uploaded = files.upload()

    if uploaded:
        # Get the actual filename Colab used for the uploaded file
        uploaded_filename = list(uploaded.keys())[0]
        uploaded_path = os.path.join('/content/', uploaded_filename)

        # Ensure the destination directory exists
        !mkdir -p ~/.kaggle

        # Copy the uploaded file to the required Kaggle path, naming it 'kaggle.json'
        # This fixes issues where Colab might rename the file during upload.
        !cp {uploaded_path} ~/.kaggle/kaggle.json
        !chmod 600 ~/.kaggle/kaggle.json

        print(f"Kaggle configuration successful. Uploaded file: {uploaded_filename}")
    else:
        print("ERROR: No file uploaded. Cannot download dataset.")
        raise FileNotFoundError("Kaggle API key is missing. Cannot proceed.")

except ImportError:
    print("WARNING: Not running in a Colab environment. Skipping file upload.")

# --- 2. Download and Extract Dataset ---
KAGGLE_DATASET = 'vipoooool/new-plant-diseases-dataset'
DATASET_PATH = '/content/PlantDiseases'
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

if not os.path.exists(DATASET_PATH):
    print("2. Downloading and unzipping dataset...")
    # This command now works because the API is configured
    !kaggle datasets download -d {KAGGLE_DATASET} -p /content/

    zip_files = [f for f in os.listdir('/content') if f.endswith('.zip')]
    if zip_files:
        # Extract the main zip file
        with zipfile.ZipFile(os.path.join('/content', zip_files[0]), 'r') as zip_ref:
            zip_ref.extractall('/content/')

        # Organize files into a cleaner structure
        source_dir = '/content/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)'
        os.makedirs(DATASET_PATH, exist_ok=True)
        shutil.move(os.path.join(source_dir,'train'), os.path.join(DATASET_PATH, 'train'))
        shutil.move(os.path.join(source_dir,'valid'), os.path.join(DATASET_PATH, 'valid'))
        shutil.rmtree('/content/New Plant Diseases Dataset(Augmented)', ignore_errors=True)
        print("Dataset successfully extracted and organized.")

# --- 3. Define Hyperparameters and Data Generators ---
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
VALID_DIR = os.path.join(DATASET_PATH, 'valid')
# Calculate the number of classes automatically
OUTPUT_SHAPE = len(os.listdir(TRAIN_DIR))

# Data Augmentation for Training
train_datagen = ImageDataGenerator(
    rescale=1./255, rotation_range=20, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.1, zoom_range=0.1,
    horizontal_flip=True, fill_mode='nearest'
)
# Rescaling only for Validation
valid_datagen = ImageDataGenerator(rescale=1./255)

# Create Data Generators
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', shuffle=True
)
valid_generator = valid_datagen.flow_from_directory(
    VALID_DIR, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', shuffle=False
)

CLASS_NAMES = list(train_generator.class_indices.keys())
print(f"\nConfiguration complete. Classes detected: {OUTPUT_SHAPE}.")
print("-" * 70)

1. Configuring Kaggle API...


Saving kaggle (1).json to kaggle (1) (2).json
/bin/bash: -c: line 1: syntax error near unexpected token `('
/bin/bash: -c: line 1: `cp /content/kaggle (1) (2).json ~/.kaggle/kaggle.json'
Kaggle configuration successful. Uploaded file: kaggle (1) (2).json
Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.

Configuration complete. Classes detected: 38.
----------------------------------------------------------------------


In [None]:

# ==============================================================================
# COLAB CELL 2: MODEL DEFINITION AND TRAINING
# (This is the longest-running cell)
# ==============================================================================

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# --- 1. Load and Freeze VGG16 Base Model ---
print("1. Loading VGG16 base model and freezing layers...")
base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_tensor=Input(shape=IMAGE_SIZE + (3,))
)
for layer in base_model.layers:
    layer.trainable = False

# --- 2. Build the Custom Classification Head ---
model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(OUTPUT_SHAPE, activation='softmax')
])

# --- 3. Compile the Model ---
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
print("Model Summary:")
model.summary()

# --- 4. Define Callbacks and Train ---
EPOCHS = 10
# Saving to the temporary /content/ folder
model_save_path = '/content/plant_disease_vgg16_best_model.keras'

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(model_save_path, monitor='val_accuracy', save_best_only=True, mode='max'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
]

print(f"\n4. Starting training for up to {EPOCHS} epochs...")

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

print(f"\nTraining finished. Best model saved to: {model_save_path}")
print("-" * 70)



1. Loading VGG16 base model and freezing layers...
Model Summary:



4. Starting training for up to 10 epochs...


  self._warn_if_super_not_called()


Epoch 1/10


Expected: ['keras_tensor_25']
Received: inputs=Tensor(shape=(None, 224, 224, 3))


[1m2196/2196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 427ms/step - accuracy: 0.5368 - loss: 1.6892

In [None]:
# ==============================================================================
# COLAB CELL 3: EVALUATION AND VISUALIZATION
# ==============================================================================

from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# --- 1. Load the Best Saved Model ---
try:
    best_model = tf.keras.models.load_model(model_save_path)
    print("Best model loaded successfully for evaluation.")
except Exception as e:
    print(f"Error loading model: {e}. Falling back to current model state.")
    best_model = model

# --- 2. Generate Predictions and Classification Report ---
valid_generator.reset()
Y_pred = best_model.predict(valid_generator, steps=valid_generator.samples // BATCH_SIZE + 1)
y_pred = np.argmax(Y_pred, axis=1)

print("\n--- Model Evaluation: Classification Report (Required F1-score) ---")
print(classification_report(valid_generator.classes, y_pred, target_names=CLASS_NAMES))

# --- 3. Plot Training History (Accuracy and Loss) ---
print("\nPlotting Training History...")
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))

plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# --- 4. Plot Confusion Matrix ---
print("\nPlotting Confusion Matrix (Large Plot)...")
cm = confusion_matrix(valid_generator.classes, y_pred)
plt.figure(figsize=(18, 15))
sns.heatmap(cm, annot=False, fmt='d', cmap='Blues', xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()
print("-" * 70)

Error loading model: name 'tf' is not defined. Falling back to current model state.


NameError: name 'model' is not defined

In [None]:
# ==============================================================================
# COLAB CELL 4: INDEPENDENT PREDICTION DEMONSTRATION
# ==============================================================================

from tensorflow.keras.preprocessing import image
import random

def get_random_validation_image(valid_dir, class_names):
    """Utility to pick a random image from the validation set."""
    random_class = random.choice(class_names)
    class_path = os.path.join(valid_dir, random_class)
    random_image_name = random.choice(os.listdir(class_path))
    return os.path.join(class_path, random_image_name)

def predict_disease(model, img_path, class_names):
    """Loads, preprocesses, and predicts the class of a single image."""
    img = image.load_img(img_path, target_size=IMAGE_SIZE)
    # Convert to array, normalize, and add batch dimension
    img_array = np.expand_dims(image.img_to_array(img) / 255.0, axis=0)

    predictions = model.predict(img_array)
    predicted_class_index = np.argmax(predictions[0])
    predicted_class_name = class_names[predicted_class_index]
    confidence = np.max(predictions[0]) * 100

    # Display the result
    plt.imshow(img)
    plt.title(f"Predicted: {predicted_class_name}\nConfidence: {confidence:.2f}%")
    plt.axis('off')
    plt.show()

# Execute Prediction
try:
    sample_img_path = get_random_validation_image(VALID_DIR, CLASS_NAMES)
    print(f"Testing a random image: {sample_img_path}")
    best_model = tf.keras.models.load_model(model_save_path)
    predict_disease(best_model, sample_img_path, CLASS_NAMES)
except Exception as e:
    print(f"Could not perform independent prediction. Error: {e}")