<a href="https://colab.research.google.com/github/ezinneanne/Farmeye/blob/new_branch/plantdisease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')
#drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [3]:
# 🌐 os: For interacting with the operating system, like working with file paths and directories
import os

# 📁 shutil: For copying, moving, or deleting files and directories
import shutil

# 🔢 numpy: For handling numerical operations and arrays (used here for data manipulation)
import numpy as np

# 📊 matplotlib.pyplot: For plotting graphs like accuracy/loss curves to visualize model performance
import matplotlib.pyplot as plt

# 🔄 train_test_split: For splitting datasets into training and validation sets randomly
from sklearn.model_selection import train_test_split

# 🤖 tensorflow: The main machine learning library we're using to build and train deep learning models
import tensorflow as tf

# 🧰 ImageDataGenerator: For loading images and applying real-time data augmentation (like flipping or rotating)
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 🏗️ layers: To build the layers of the neural network (like Dense, Dropout, etc.)
# 🏗️ models: To define and compile the overall neural network model
from tensorflow.keras import layers, models

In [5]:
# Set parameters

# Path to the dataset (CATEGORY A inside Google Drive)
DATA_DIR = '/content/drive/MyDrive/image_dataset/CATEGORY A'

# Path to the output directory where the split (train/val) data will be stored in Colab's local storage
OUTPUT_DIR = '/content/split_data'

# Path to save the trained model back to Google Drive
MODEL_SAVE_PATH = '/content/drive/MyDrive/crop_pest_disease_classifier.keras'

# Resize all images to 224x224 pixels (standard size for many pretrained models)
IMG_SIZE = (224, 224)

# Number of images processed together in one batch during training
BATCH_SIZE = 32

# Create the output directory if it doesn't already exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [6]:
# Debugging cell
os.listdir('/content/drive/MyDrive')

['Getting started.pdf', 'TOM2024.zip', 'image_dataset']

In [7]:
import glob

In [8]:
def split_data(data_dir, output_dir, val_size=0.2):
    """
    Split the dataset into training and validation sets while preserving folder structure.
    """
    train_dir = os.path.join(output_dir, 'train')
    val_dir = os.path.join(output_dir, 'val')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    print("📁 Starting data split...")

    # Loop through each language folder (CATA-English, CATA-French)
    for language_folder in os.listdir(data_dir):
        language_path = os.path.join(data_dir, language_folder)

        if not os.path.isdir(language_path):
            continue  # Skip if it's not a folder

        print(f"🔍 Processing language folder: {language_folder}")

        # Loop through each class (onion_diseases, maize_pests, etc.)
        for class_name in os.listdir(language_path):
            class_folder = os.path.join(language_path, class_name)

            if not os.path.isdir(class_folder):
                continue  # Skip if it's not a folder

            print(f"  ➡️ Class: {class_name}")

            # ✅ Recursively get all image paths from subfolders
            image_paths = glob.glob(os.path.join(class_folder, '**', '*.*'), recursive=True)
            image_paths = [path for path in image_paths if path.lower().endswith(('.png', '.jpg', '.jpeg'))]

            print(f"    Found {len(image_paths)} images.")

            if len(image_paths) == 0:
                print(f"    ⚠️ No images found for {class_name}. Skipping...")
                continue  # Skip this class if no images are found

            # Split into training and validation sets
            train_paths, val_paths = train_test_split(image_paths, test_size=val_size, random_state=42)

            # Create corresponding folders in train and val directories
            os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
            os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

            # Copy training images
            for path in train_paths:
                shutil.copy(path, os.path.join(train_dir, class_name, os.path.basename(path)))

            # Copy validation images
            for path in val_paths:
                shutil.copy(path, os.path.join(val_dir, class_name, os.path.basename(path)))

            print(f"    ✅ {len(train_paths)} training and {len(val_paths)} validation images copied for class '{class_name}'.")

    print("✅ Data split completed!")

split_data(DATA_DIR, OUTPUT_DIR)

📁 Starting data split...
🔍 Processing language folder: CATA-French
  ➡️ Class: onion_diseases
    Found 2165 images.
    ✅ 1732 training and 433 validation images copied for class 'onion_diseases'.
  ➡️ Class: tomato_pests
    Found 317 images.
    ✅ 253 training and 64 validation images copied for class 'tomato_pests'.
  ➡️ Class: tomato_diseases
    Found 4162 images.
    ✅ 3329 training and 833 validation images copied for class 'tomato_diseases'.
  ➡️ Class: onion_pests
    Found 879 images.
    ✅ 703 training and 176 validation images copied for class 'onion_pests'.
  ➡️ Class: maize_pests_activities
    Found 616 images.
    ✅ 492 training and 124 validation images copied for class 'maize_pests_activities'.
  ➡️ Class: maize_diseases
    Found 3517 images.
    ✅ 2813 training and 704 validation images copied for class 'maize_diseases'.
  ➡️ Class: maize_pests
    Found 604 images.
    ✅ 483 training and 121 validation images copied for class 'maize_pests'.
🔍 Processing language f

In [9]:
# Step 2: Create ImageDataGenerators for data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,               # Normalize pixel values to [0,1]
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

validation_datagen = ImageDataGenerator(rescale=1./255)

In [10]:
# Step 3: Create data generators for training and validation sets
train_generator = train_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, 'train'),  # Updated with the split data path
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'  # 'categorical' because we have multiple classes
)

validation_generator = validation_datagen.flow_from_directory(
    os.path.join(OUTPUT_DIR, 'val'),  # Updated with the split data path
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Found 11760 images belonging to 7 classes.
Found 4405 images belonging to 7 classes.


In [11]:
# Step 4: Build the Model using EfficientNetB0 for transfer learning
base_model = tf.keras.applications.EfficientNetB0(
    weights='imagenet', include_top=False, input_shape=(224, 224, 3)
)
base_model.trainable = False  # Freeze the base model layers

# Build the model on top of EfficientNetB0
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(train_generator.num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
# Step 5: Train model with progress
print("🚀 Starting training...")
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)
print("🎉 Training completed!")

🚀 Starting training...


  self._warn_if_super_not_called()


Epoch 1/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1448s[0m 4s/step - accuracy: 0.3131 - loss: 1.6592 - val_accuracy: 0.3367 - val_loss: 1.6294
Epoch 2/10
[1m  1/367[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:40[0m 2s/step - accuracy: 0.2812 - loss: 1.5695



[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m377s[0m 1s/step - accuracy: 0.2812 - loss: 1.5695 - val_accuracy: 0.3367 - val_loss: 1.6225
Epoch 3/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1412s[0m 4s/step - accuracy: 0.3346 - loss: 1.6222 - val_accuracy: 0.3365 - val_loss: 1.6196
Epoch 4/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m335s[0m 908ms/step - accuracy: 0.4062 - loss: 1.6604 - val_accuracy: 0.3365 - val_loss: 1.6213
Epoch 5/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1398s[0m 4s/step - accuracy: 0.3342 - loss: 1.6235 - val_accuracy: 0.3371 - val_loss: 1.6241
Epoch 6/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m335s[0m 906ms/step - accuracy: 0.3438 - loss: 1.4451 - val_accuracy: 0.3360 - val_loss: 1.6248
Epoch 7/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1405s[0m 4s/step - accuracy: 0.3343 - loss: 1.6155 - val_accuracy: 0.3369 - val_loss: 1.6124
Epoch 8/10
[1m367/367[

In [1]:
# Step 6: Plot accuracy and loss
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'b', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'b', label='Training Loss')
    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.show()

plot_history(history)

NameError: name 'history' is not defined

In [None]:
# Step 7: Save model to Google Drive
print(f"💾 Saving model to {MODEL_SAVE_PATH}")
model.save(MODEL_SAVE_PATH)
print("✅ Model saved!")