In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
# Path to your main dataset folder (update this as needed)
dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"


# Set the desired image size
IMG_SIZE = 224

# Lists to store images and labels
images = []
labels = []

# Loop through each subfolder and read images
for class_name in tqdm(os.listdir(dataset_path), desc="Loading Images"):
    class_path = os.path.join(dataset_path, class_name)
    
    if os.path.isdir(class_path):
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                images.append(img)
                labels.append(class_name)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")

Loading Images: 100%|██████████| 16/16 [00:46<00:00,  2.88s/it]


In [3]:
images = np.array(images) / 255.0  # Normalize image pixel values to [0,1]
labels = np.array(labels)

print(f"Total images loaded: {len(images)}")
print(f"Number of unique classes: {len(np.unique(labels))}")

Total images loaded: 7907
Number of unique classes: 16


In [6]:
import os

# Change this to your actual dataset path
dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"

# List of image file extensions (you can modify if needed)
image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']

# Counter
total_images = 0

# Loop through each folder and count image files
for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if os.path.splitext(file)[1].lower() in image_extensions:
            total_images += 1

print(f"Total number of images in dataset: {total_images}")


Total number of images in dataset: 7907


Preprocessing

In [8]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 🔹 Dataset path
dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"
batch_size = 32
img_size = (224, 224)  # Change if necessary

# 🔹 Data Augmentation & Preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2,
    brightness_range=[0.8, 1.2]
)

train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

print(f"Preprocessed {train_generator.samples} training and {val_generator.samples} validation images")


Found 6331 images belonging to 16 classes.
Found 1576 images belonging to 16 classes.
Preprocessed 6331 training and 1576 validation images


DenseNet121

In [11]:
# ✅ Imports
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
import numpy as np
import os

# ✅ Set seed for reproducibility
tf.random.set_seed(42)

# ✅ Define dataset path
dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"

# ✅ Data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    validation_split=0.2,
    fill_mode='nearest'
)

# ✅ Generators
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=24,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

val_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=24,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# ✅ Load DenseNet121 base
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base

# ✅ Custom classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.0005))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
x = Dense(512, activation='relu', kernel_regularizer=l2(0.0005))(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
outputs = Dense(train_generator.num_classes, activation='softmax')(x)

# ✅ Build model
model = Model(inputs=base_model.input, outputs=outputs)

# ✅ Compile model
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# ✅ Callbacks
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1),
    EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True, verbose=1),
    ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
]

# ✅ PHASE 1: Train with frozen base
print("🔹 Phase 1: Training with frozen DenseNet base...")
initial_epochs = 15
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=initial_epochs,
    callbacks=callbacks
)

# ✅ PHASE 2: Fine-tuning
print("🔹 Phase 2: Fine-tuning the DenseNet...")

# Unfreeze last 80 layers
for layer in base_model.layers[:-80]:
    layer.trainable = False
for layer in base_model.layers[-80:]:
    layer.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=Adam(learning_rate=5e-6),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Optional: Cosine learning rate schedule
def cosine_decay_with_warmup(epoch):
    warmup_epochs = 5
    total_epochs = 40
    warmup_lr = 1e-5
    base_lr = 1e-3
    min_lr = 1e-6

    if epoch < warmup_epochs:
        return warmup_lr + (base_lr - warmup_lr) * (epoch / warmup_epochs)
    
    progress = (epoch - warmup_epochs) / (total_epochs - warmup_epochs)
    return min_lr + 0.5 * (base_lr - min_lr) * (1 + np.cos(np.pi * progress))

lr_scheduler = LearningRateScheduler(cosine_decay_with_warmup)
fine_tuning_callbacks = callbacks + [lr_scheduler]

# Fine-tune model
total_epochs = 40
history_fine = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=initial_epochs,
    callbacks=fine_tuning_callbacks
)

# ✅ Save final model
model.save("crop_disease_model_final.h5")
print("Final model saved as 'crop_disease_model_final.h5'")


Found 6331 images belonging to 16 classes.
Found 1576 images belonging to 16 classes.
🔹 Phase 1: Training with frozen DenseNet base...
Epoch 1/15


2025-04-09 09:38:13.603976: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2025-04-09 09:38:16.924209: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f15001480f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-04-09 09:38:16.924232: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA H100 PCIe MIG 3g.40gb, Compute Capability 9.0
2025-04-09 09:38:16.951295: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-04-09 09:38:17.078539: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or direc



2025-04-09 09:39:27.696637: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_accuracy improved from -inf to 0.84835, saving model to best_model.h5
Epoch 2/15
Epoch 2: val_accuracy improved from 0.84835 to 0.87754, saving model to best_model.h5
Epoch 3/15
Epoch 3: val_accuracy did not improve from 0.87754
Epoch 4/15
Epoch 4: val_accuracy did not improve from 0.87754
Epoch 5/15
Epoch 5: val_accuracy did not improve from 0.87754
Epoch 6/15
Epoch 6: val_accuracy did not improve from 0.87754
Epoch 7/15
Epoch 7: val_accuracy improved from 0.87754 to 0.89213, saving model to best_model.h5
Epoch 8/15
Epoch 8: val_accuracy did not improve from 0.89213
Epoch 9/15
Epoch 9: val_accuracy did not improve from 0.89213
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.89213
Epoch 11/15
Epoch 11: val_accuracy did not improve from 0.89213
Epoch 12/15
Epoch 12: val_accuracy did not improve from 0.89213
Epoch 13/15
Epoch 13: val_accuracy did not improve from 0.89213
Epoch 14/15
Epoch 14: val_accuracy improved from 0.89213 to 0.89467, saving model to best_mode

2025-04-09 10:01:09.764395: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-09 10:02:34.364584: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 16: val_accuracy improved from 0.89467 to 0.89530, saving model to best_model.h5
Epoch 17/40
Epoch 17: val_accuracy improved from 0.89530 to 0.91371, saving model to best_model.h5
Epoch 18/40
Epoch 18: val_accuracy did not improve from 0.91371
Epoch 19/40
Epoch 19: val_accuracy improved from 0.91371 to 0.92576, saving model to best_model.h5
Epoch 20/40
Epoch 20: val_accuracy improved from 0.92576 to 0.92640, saving model to best_model.h5
Epoch 21/40
Epoch 21: val_accuracy improved from 0.92640 to 0.93274, saving model to best_model.h5
Epoch 22/40
Epoch 22: val_accuracy did not improve from 0.93274
Epoch 23/40
Epoch 23: val_accuracy did not improve from 0.93274
Epoch 24/40
Epoch 24: val_accuracy did not improve from 0.93274
Epoch 25/40
Epoch 25: val_accuracy did not improve from 0.93274
Epoch 26/40
Epoch 26: ReduceLROnPlateau reducing learning rate to 7.787015638314188e-05.

Epoch 26: val_accuracy did not improve from 0.93274
Epoch 27/40
Epoch 27: val_accuracy did not improve fro

EfficientNetB0

In [18]:
# ✅ EfficientNetB0 Training Script with Improvements for Accuracy

import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
# Use AdamW for potentially better weight decay handling
from tensorflow.keras.optimizers.experimental import AdamW # Or tf.keras.optimizers if using older TF
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
import numpy as np
import os
import shutil # For splitting data

# 🔹 Set seed for reproducibility
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

# 🔹 Dataset path (update this if needed)
base_dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"

train_dir = os.path.join(base_dataset_path, "train")
val_dir = os.path.join(base_dataset_path, "validation")
VAL_SPLIT = 0.2 # Use 20% of data for validation

# 🔹 Helper Function to Split Data (Run this once if needed)
def split_data(base_path, train_path, val_path, split_ratio=0.2):
    if os.path.exists(train_path) and os.path.exists(val_path):
        print("Train/Validation directories already exist. Skipping split.")
        return

    print(f"Creating train/validation split ({1-split_ratio:.0%}/{split_ratio:.0%}) in {base_path}...")
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(val_path, exist_ok=True)

    for class_name in os.listdir(base_path):
        class_dir = os.path.join(base_path, class_name)
        if os.path.isdir(class_dir): # Make sure it's a directory
            # Create corresponding directories in train/val
            train_class_dir = os.path.join(train_path, class_name)
            val_class_dir = os.path.join(val_path, class_name)
            os.makedirs(train_class_dir, exist_ok=True)
            os.makedirs(val_class_dir, exist_ok=True)

            # List images and shuffle
            images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
            np.random.shuffle(images) # Shuffle in place

            # Split files
            split_point = int(len(images) * (1 - split_ratio))
            train_files = images[:split_point]
            val_files = images[split_point:]

            # Copy files
            for f in train_files:
                shutil.copy(os.path.join(class_dir, f), os.path.join(train_class_dir, f))
            for f in val_files:
                shutil.copy(os.path.join(class_dir, f), os.path.join(val_class_dir, f))
    print("Data splitting complete.")

# --- Run the data splitting function ---
# Important: Make sure your original 'Capstone Dataset' folder only contains class subfolders.
# If it already has 'train' and 'validation' subfolders, comment out or skip this call.
# Adjust base_dataset_path if 'Capstone Dataset' itself contains the class folders.
original_data_path = base_dataset_path # Assumes class folders are directly inside this path
split_data(original_data_path, train_dir, val_dir, split_ratio=VAL_SPLIT)
# ---------------------------------------


# 🔹 Data Augmentation & Generators
IMG_SIZE = (224, 224)
BATCH_SIZE = 32 # Slightly increased batch size, adjust based on GPU memory

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
    # Removed validation_split as we now have separate directories
)

# Validation generator ONLY rescales
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir, # Use the new train directory
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=SEED
)

val_generator = val_datagen.flow_from_directory(
    val_dir, # Use the new validation directory
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE, # Can use larger batch size for validation if memory allows
    class_mode='categorical',
    shuffle=False # No need to shuffle validation data
)

# Check number of classes
num_classes = train_generator.num_classes
print(f"Found {train_generator.samples} train images belonging to {num_classes} classes.")
print(f"Found {val_generator.samples} validation images belonging to {num_classes} classes.")

# 🔹 Load EfficientNetB0
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False  # Freeze base initially

# 🔹 Add classification head (slightly adjusted)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# Consider reducing Dense units if overfitting is suspected, or increasing if underfitting
x = Dense(512, activation='relu', kernel_regularizer=l2(0.001))(x) # Slightly increased L2
x = BatchNormalization()(x)
x = Dropout(0.4)(x) # Slightly increased Dropout
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=outputs)

# 🔹 Compile model for Phase 1
# Initial LR can be critical. 1e-3 is common, but 5e-4 might be safer.
initial_lr = 1e-3
optimizer_phase1 = AdamW(learning_rate=initial_lr, weight_decay=1e-4) # Using AdamW
model.compile(
    optimizer=optimizer_phase1,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# 🔹 Callbacks
# Using the same ReduceLROnPlateau for both phases initially
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
# Increased patience slightly for EarlyStopping
early_stopping = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1)
# ModelCheckpoint remains crucial
model_checkpoint = ModelCheckpoint('efficientnetb0_best_weights.h5', monitor='val_accuracy', save_best_only=True,
                                   save_weights_only=True, verbose=1)

callbacks_phase1 = [reduce_lr, early_stopping, model_checkpoint]

# 🔹 PHASE 1: Training with frozen base
print("\n🔹 Phase 1: EfficientNetB0 - Frozen base")
initial_epochs = 20 # Increased epochs for initial training
history_phase1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=initial_epochs,
    callbacks=callbacks_phase1,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_steps=val_generator.samples // BATCH_SIZE
)

# Find the epoch with the best validation accuracy in phase 1
best_epoch_phase1 = np.argmax(history_phase1.history['val_accuracy'])
best_val_acc_phase1 = np.max(history_phase1.history['val_accuracy'])
print(f"\nPhase 1 Best Validation Accuracy: {best_val_acc_phase1:.4f} at epoch {best_epoch_phase1 + 1}")

# Load the best weights found during Phase 1 before fine-tuning
print("Loading best weights from Phase 1...")
model.load_weights('efficientnetb0_best_weights.h5')

# 🔹 PHASE 2: Fine-tuning
print("\n🔹 Phase 2: EfficientNetB0 - Fine-tuning")

# Unfreeze some layers - Start with fewer layers
# EfficientNetB0 has ~230 layers. Let's unfreeze the last block (approx last 30-40 layers)
# Fine-tune this number based on results. More layers = more risk of overfitting.
num_layers_to_unfreeze = 40
print(f"Unfreezing the top {num_layers_to_unfreeze} layers of the base model.")
base_model.trainable = True
for layer in base_model.layers[:-num_layers_to_unfreeze]:
     # Keep batch norm layers frozen if using them, helps prevent unstable gradients
    if not isinstance(layer, BatchNormalization):
        layer.trainable = False

# Check trainable status
#for i, layer in enumerate(base_model.layers):
#    print(i, layer.name, layer.trainable)


# Recompile with a lower LR for fine-tuning
fine_tune_lr = 1e-5 # Start with a significantly lower LR for fine-tuning
optimizer_phase2 = AdamW(learning_rate=fine_tune_lr, weight_decay=1e-5) # Lower weight decay too
model.compile(
    optimizer=optimizer_phase2,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary() # Show summary with updated trainable params

# Use the same callbacks, but potentially adjust patience if needed
# The EarlyStopping callback will continue from where it left off (if not triggered)
# ReduceLROnPlateau will also adapt based on the new optimizer and LR
callbacks_phase2 = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=4, min_lr=1e-7, verbose=1), # More patience for fine-tuning LR reduction
    EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1), # More patience for fine-tuning stopping
    ModelCheckpoint('efficientnetb0_best_weights.h5', monitor='val_accuracy', save_best_only=True,
                    save_weights_only=True, verbose=1) # Continue saving best weights
]

# Train model further with fine-tuning
total_epochs = 60 # Increase total epochs
fine_tune_epochs = total_epochs - initial_epochs

print(f"Starting fine-tuning for {fine_tune_epochs} epochs...")
history_phase2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=initial_epochs, # Start counting epochs from here
    callbacks=callbacks_phase2,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_steps=val_generator.samples // BATCH_SIZE
)

# 🔹 Evaluation after Fine-Tuning
print("\nLoading best weights achieved during entire training...")
model.load_weights('efficientnetb0_best_weights.h5')

print("\nEvaluating model with best weights on validation set:")
loss, accuracy = model.evaluate(val_generator, steps=val_generator.samples // BATCH_SIZE)
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")

if accuracy >= 0.91:
    print(f"\n🎉 Target accuracy reached! Final Validation Accuracy: {accuracy:.4f}")
else:
    print(f"\nTarget accuracy of 0.91 not reached. Final Best Validation Accuracy: {accuracy:.4f}")
    print("Consider further experimentation (more epochs, different LR, more/less unfrozen layers, stronger augmentation, different model size like B1/B2).")


# 🔹 Save final model (architecture + best weights)
model.save("efficientnetb0_final_best.h5")
print("\n✅ Best model saved as 'efficientnetb0_final_best.h5'")

Train/Validation directories already exist. Skipping split.
Found 6320 images belonging to 18 classes.
Found 1587 images belonging to 18 classes.
Found 6320 train images belonging to 18 classes.
Found 1587 validation images belonging to 18 classes.
Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_8 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling_6 (Rescaling)        (None, 224, 224, 3)  0           ['input_8[0][0]']                
                                                                                                  
 normalization_3 (Normalization  (None, 2

2025-04-09 13:29:30.855120: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-09 13:30:43.483371: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_accuracy improved from -inf to 0.13967, saving model to efficientnetb0_best_weights.h5
Epoch 2/20
Epoch 2: val_accuracy improved from 0.13967 to 0.16837, saving model to efficientnetb0_best_weights.h5
Epoch 3/20
Epoch 3: val_accuracy did not improve from 0.16837
Epoch 4/20
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.

Epoch 4: val_accuracy improved from 0.16837 to 0.24936, saving model to efficientnetb0_best_weights.h5
Epoch 5/20
Epoch 5: val_accuracy did not improve from 0.24936
Epoch 6/20
Epoch 6: val_accuracy improved from 0.24936 to 0.37054, saving model to efficientnetb0_best_weights.h5
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.37054
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.37054
Epoch 9/20
Epoch 9: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.

Epoch 9: val_accuracy did not improve from 0.37054
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.37054
Epoch 11/20
Epoch 11: val_a

2025-04-09 13:55:35.860880: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2025-04-09 13:55:40.810560: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:1014] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_6/block2b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer




2025-04-09 13:57:05.883541: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 21: val_accuracy improved from -inf to 0.08801, saving model to efficientnetb0_best_weights.h5
Epoch 22/60
Epoch 22: val_accuracy improved from 0.08801 to 0.16263, saving model to efficientnetb0_best_weights.h5
Epoch 23/60
Epoch 23: val_accuracy improved from 0.16263 to 0.52934, saving model to efficientnetb0_best_weights.h5
Epoch 24/60
Epoch 24: val_accuracy improved from 0.52934 to 0.80612, saving model to efficientnetb0_best_weights.h5
Epoch 25/60
Epoch 25: val_accuracy improved from 0.80612 to 0.86224, saving model to efficientnetb0_best_weights.h5
Epoch 26/60
Epoch 26: val_accuracy improved from 0.86224 to 0.86798, saving model to efficientnetb0_best_weights.h5
Epoch 27/60
Epoch 27: val_accuracy did not improve from 0.86798
Epoch 28/60
Epoch 28: val_accuracy improved from 0.86798 to 0.88138, saving model to efficientnetb0_best_weights.h5
Epoch 29/60
Epoch 29: val_accuracy improved from 0.88138 to 0.88712, saving model to efficientnetb0_best_weights.h5
Epoch 30/60
Epoch 30: 

2025-04-09 14:52:03.648453: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Validation Loss: 0.3073
Validation Accuracy: 0.9305

🎉 Target accuracy reached! Final Validation Accuracy: 0.9305


TypeError: Unable to serialize [2.0896919 2.1128857 2.1081853] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.

ResNet50

In [17]:
# ✅ ResNet50 Training Script with Improvements for Accuracy

import tensorflow as tf
from tensorflow.keras.applications import ResNet50
# Import the specific preprocessing function for ResNet
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers.experimental import AdamW # Or tf.keras.optimizers if using older TF
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
import numpy as np
import os
import shutil # For splitting data

# 🔹 Set seed for reproducibility
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

# 🔹 Dataset path (update this if needed)
base_dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"

train_dir = os.path.join(base_dataset_path, "train")
val_dir = os.path.join(base_dataset_path, "validation")
VAL_SPLIT = 0.2 # Use 20% of data for validation

# 🔹 Helper Function to Split Data (Run this once if needed)
def split_data(base_path, train_path, val_path, split_ratio=0.2):
    # --- (This function is identical to the previous one) ---
    if os.path.exists(train_path) and os.path.exists(val_path):
        print("Train/Validation directories already exist. Skipping split.")
        return

    print(f"Creating train/validation split ({1-split_ratio:.0%}/{split_ratio:.0%}) in {base_path}...")
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(val_path, exist_ok=True)

    original_class_folders = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    if not original_class_folders:
         print(f"Error: No class subdirectories found directly inside '{base_path}'.")
         print("Please ensure class folders (e.g., 'ClassA', 'ClassB') are directly within the base path.")
         return # Stop if no class folders found

    for class_name in original_class_folders:
        class_dir = os.path.join(base_path, class_name)
        # Create corresponding directories in train/val
        train_class_dir = os.path.join(train_path, class_name)
        val_class_dir = os.path.join(val_path, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)

        # List images and shuffle
        images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
        if not images:
            print(f"Warning: No images found in {class_dir}")
            continue
        np.random.shuffle(images) # Shuffle in place

        # Split files
        split_point = int(len(images) * (1 - split_ratio))
        train_files = images[:split_point]
        val_files = images[split_point:]

        # Copy files
        print(f"  Splitting {class_name}: {len(train_files)} train, {len(val_files)} val")
        for f in train_files:
             try:
                shutil.copy2(os.path.join(class_dir, f), os.path.join(train_class_dir, f)) # copy2 preserves metadata
             except Exception as e:
                 print(f"Error copying {f} to train: {e}")
        for f in val_files:
            try:
                shutil.copy2(os.path.join(class_dir, f), os.path.join(val_class_dir, f))
            except Exception as e:
                 print(f"Error copying {f} to val: {e}")
    print("Data splitting complete.")
    print(f"Train data in: {train_path}")
    print(f"Validation data in: {val_path}")


# --- Run the data splitting function ---
# Important: Make sure your base_dataset_path points to the directory containing class subfolders.
original_data_path = base_dataset_path # Assumes class folders are directly inside this path
split_data(original_data_path, train_dir, val_dir, split_ratio=VAL_SPLIT)
# ---------------------------------------


# 🔹 Data Augmentation & Generators
IMG_SIZE = (224, 224) # Standard ResNet50 input size
BATCH_SIZE = 32 # Adjust based on GPU memory

train_datagen = ImageDataGenerator(
    # No rescale=1./255, use ResNet's preprocess_input instead
    preprocessing_function=preprocess_input,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2], # Still okay to use brightness adjustment
    fill_mode='nearest'
)

# Validation generator ONLY uses preprocess_input
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=SEED
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Check number of classes
try:
    num_classes = train_generator.num_classes
    print(f"Found {train_generator.samples} train images belonging to {num_classes} classes.")
    print(f"Found {val_generator.samples} validation images belonging to {num_classes} classes.")
    if num_classes is None or num_classes == 0:
        raise ValueError("No classes found. Check data directories.")
except Exception as e:
    print(f"Error initializing generators or finding classes: {e}")
    print("Please ensure 'train_dir' and 'val_dir' contain subdirectories for each class, and these subdirectories contain images.")
    exit() # Stop script if data isn't loaded correctly


# 🔹 Load ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False  # Freeze base initially

# 🔹 Add classification head
x = base_model.output
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dense(512, activation='relu', kernel_regularizer=l2(0.001), name='dense_head')(x)
x = BatchNormalization(name='bn_head')(x)
x = Dropout(0.4, name='dropout_head')(x) # Adjusted dropout
outputs = Dense(num_classes, activation='softmax', name='predictions')(x)

model = Model(inputs=base_model.input, outputs=outputs)

# 🔹 Compile model for Phase 1
initial_lr = 1e-3
optimizer_phase1 = AdamW(learning_rate=initial_lr, weight_decay=1e-4)
model.compile(
    optimizer=optimizer_phase1,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary (Phase 1: Frozen Base) ---")
model.summary()

# 🔹 Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1) # Increased patience
model_checkpoint = ModelCheckpoint('resnet50_best_weights.h5', # Changed filename
                                   monitor='val_accuracy', save_best_only=True,
                                   save_weights_only=True, verbose=1)

callbacks_phase1 = [reduce_lr, early_stopping, model_checkpoint]

# 🔹 PHASE 1: Training with frozen base
print("\n🔹 Phase 1: ResNet50 - Frozen base")
initial_epochs = 20 # Can adjust this
history_phase1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=initial_epochs,
    callbacks=callbacks_phase1,
    steps_per_epoch=max(1, train_generator.samples // BATCH_SIZE), # Ensure steps >= 1
    validation_steps=max(1, val_generator.samples // BATCH_SIZE)   # Ensure steps >= 1
)

# Find the epoch with the best validation accuracy in phase 1
if history_phase1 and 'val_accuracy' in history_phase1.history and history_phase1.history['val_accuracy']:
    best_epoch_phase1 = np.argmax(history_phase1.history['val_accuracy'])
    best_val_acc_phase1 = np.max(history_phase1.history['val_accuracy'])
    print(f"\nPhase 1 Best Validation Accuracy: {best_val_acc_phase1:.4f} at epoch {best_epoch_phase1 + 1}")
else:
    print("\nWarning: No validation accuracy history found for Phase 1. Cannot determine best epoch.")
    best_val_acc_phase1 = 0 # Assign a default

# Load the best weights found during Phase 1 before fine-tuning
# Check if the weights file exists before loading
best_weights_path = 'resnet50_best_weights.h5'
if os.path.exists(best_weights_path) and best_val_acc_phase1 > 0:
     print(f"Loading best weights from Phase 1 ({best_weights_path})...")
     try:
        model.load_weights(best_weights_path)
     except Exception as e:
         print(f"Error loading weights: {e}. Continuing without loading.")
else:
    print("Best weights file not found or no improvement in Phase 1. Proceeding with current weights for fine-tuning.")


# 🔹 PHASE 2: Fine-tuning
print("\n🔹 Phase 2: ResNet50 - Fine-tuning")

# Unfreeze layers from the beginning of the last stage (conv5_block1)
base_model.trainable = True
fine_tune_from_layer = 'conv5_block1_1_conv' # Start fine-tuning from here
unfreeze_from_index = None
for i, layer in enumerate(base_model.layers):
    if layer.name == fine_tune_from_layer:
        unfreeze_from_index = i
        break

if unfreeze_from_index is not None:
    print(f"Unfreezing layers from index {unfreeze_from_index} ('{fine_tune_from_layer}') onwards.")
    for layer in base_model.layers[:unfreeze_from_index]:
        layer.trainable = False
    # Keep Batch Normalization layers frozen during early fine-tuning
    for layer in base_model.layers[unfreeze_from_index:]:
         if isinstance(layer, BatchNormalization):
             print(f"  Keeping BN layer frozen: {layer.name}")
             layer.trainable = False # Keep BN frozen
         else:
             layer.trainable = True # Ensure others are trainable
else:
    print(f"Warning: Layer '{fine_tune_from_layer}' not found. Unfreezing all base model layers.")
    # Fallback: keep all BN frozen if specific layer not found
    for layer in base_model.layers:
         if isinstance(layer, BatchNormalization):
             layer.trainable = False


# Recompile with a very low LR for fine-tuning
fine_tune_lr = 1e-5
optimizer_phase2 = AdamW(learning_rate=fine_tune_lr, weight_decay=1e-5)
model.compile(
    optimizer=optimizer_phase2,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary (Phase 2: Fine-tuning) ---")
model.summary() # Show summary with updated trainable params

# Adjust callbacks for fine-tuning phase
reduce_lr_ft = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=4, min_lr=1e-7, verbose=1) # More patience
early_stopping_ft = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1) # More patience
model_checkpoint_ft = ModelCheckpoint('resnet50_best_weights.h5', # Continue saving best weights to the same file
                                      monitor='val_accuracy', save_best_only=True,
                                      save_weights_only=True, verbose=1)

callbacks_phase2 = [reduce_lr_ft, early_stopping_ft, model_checkpoint_ft]


# Train model further with fine-tuning
total_epochs = 60 # Adjust total epochs as needed
fine_tune_epochs = total_epochs - initial_epochs

print(f"Starting fine-tuning for up to {fine_tune_epochs} epochs (total epochs: {total_epochs})...")
history_phase2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=initial_epochs, # Resume epoch counting
    callbacks=callbacks_phase2,
    steps_per_epoch=max(1, train_generator.samples // BATCH_SIZE),
    validation_steps=max(1, val_generator.samples // BATCH_SIZE)
)

# 🔹 Evaluation after Fine-Tuning
print("\nLoading best weights achieved during entire training...")
# Load the absolute best weights saved during the whole process
if os.path.exists(best_weights_path):
    print(f"Loading best weights from {best_weights_path} for final evaluation...")
    try:
        model.load_weights(best_weights_path)
    except Exception as e:
        print(f"Error loading final best weights: {e}. Evaluating with current weights.")
else:
    print("Best weights file not found. Evaluating with final weights from training.")


print("\nEvaluating model with best weights on validation set:")
# Ensure validation steps is at least 1 for evaluation
eval_validation_steps = max(1, val_generator.samples // BATCH_SIZE)
loss, accuracy = model.evaluate(val_generator, steps=eval_validation_steps)
print(f"Final Validation Loss: {loss:.4f}")
print(f"Final Validation Accuracy: {accuracy:.4f}")

if accuracy >= 0.91:
    print(f"\n🎉 Target accuracy reached! Final Validation Accuracy: {accuracy:.4f}")
else:
    print(f"\nTarget accuracy of 0.91 not reached. Final Best Validation Accuracy: {accuracy:.4f}")
    print("Consider further experimentation (more epochs, different LR, adjust unfrozen layers, augmentation, etc.).")


# 🔹 Save final model (architecture + best weights)
final_model_path = "resnet50_final_best.h5"
model.save(final_model_path)
print(f"\n✅ Best model saved as '{final_model_path}'")

Creating train/validation split (80%/20%) in /data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset...
  Splitting Potato___Late_Blight: 800 train, 200 val
  Splitting Potato___Early_Blight: 200 train, 50 val
  Splitting Cotton___Thrip: 139 train, 35 val
  Splitting Rice___Brown_Spot: 326 train, 82 val
  Splitting Corn___Gray_Leaf_Spot: 410 train, 103 val
  Splitting Corn___Common_Rust: 397 train, 100 val
  Splitting Corn___Healthy: 788 train, 197 val
  Splitting Wheat___Yellow_Rust: 107 train, 27 val
  Splitting Rice___Neck_Blast: 615 train, 154 val
  Splitting Rice___Leaf_Blast: 569 train, 143 val
  Splitting Rice___Healthy: 372 train, 93 val
  Splitting Potato___Healthy: 121 train, 31 val
  Splitting Corn___Northern_Leaf_Blight: 496 train, 124 val
  Splitting Wheat___Healthy: 892 train, 224 val
  Splitting Wheat___Brown_Rust: 80 train, 21 val
  Splitting Sugarcane__Red Rot: 8 train, 3 val
Data splitting complete.
Train data in: /data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Cap

2025-04-09 12:09:20.309972: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 1/20

2025-04-09 12:10:54.928191: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_accuracy improved from -inf to 0.92602, saving model to resnet50_best_weights.h5
Epoch 2/20
Epoch 2: val_accuracy improved from 0.92602 to 0.93941, saving model to resnet50_best_weights.h5
Epoch 3/20
Epoch 3: val_accuracy did not improve from 0.93941
Epoch 4/20
Epoch 4: val_accuracy did not improve from 0.93941
Epoch 5/20
Epoch 5: val_accuracy did not improve from 0.93941
Epoch 6/20
Epoch 6: val_accuracy improved from 0.93941 to 0.94643, saving model to resnet50_best_weights.h5
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.94643
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.94643
Epoch 9/20
Epoch 9: val_accuracy did not improve from 0.94643
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.94643
Epoch 11/20
Epoch 11: val_accuracy did not improve from 0.94643
Epoch 12/20
Epoch 12: val_accuracy did not improve from 0.94643
Epoch 13/20
Epoch 13: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.

Epoch 13: val_accuracy did not imp

2025-04-09 12:35:56.621633: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-09 12:37:10.409810: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 21: val_accuracy improved from -inf to 0.94579, saving model to resnet50_best_weights.h5
Epoch 22/60
Epoch 22: val_accuracy improved from 0.94579 to 0.95472, saving model to resnet50_best_weights.h5
Epoch 23/60
Epoch 23: val_accuracy improved from 0.95472 to 0.95536, saving model to resnet50_best_weights.h5
Epoch 24/60
Epoch 24: val_accuracy improved from 0.95536 to 0.95599, saving model to resnet50_best_weights.h5
Epoch 25/60
Epoch 25: val_accuracy improved from 0.95599 to 0.95918, saving model to resnet50_best_weights.h5
Epoch 26/60
Epoch 26: val_accuracy did not improve from 0.95918
Epoch 27/60
Epoch 27: val_accuracy improved from 0.95918 to 0.96110, saving model to resnet50_best_weights.h5
Epoch 28/60
Epoch 28: val_accuracy did not improve from 0.96110
Epoch 29/60
Epoch 29: val_accuracy improved from 0.96110 to 0.96301, saving model to resnet50_best_weights.h5
Epoch 30/60
Epoch 30: ReduceLROnPlateau reducing learning rate to 1.9999999494757505e-06.

Epoch 30: val_accuracy di

2025-04-09 13:29:19.129448: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Final Validation Loss: 0.1784
Final Validation Accuracy: 0.9688

🎉 Target accuracy reached! Final Validation Accuracy: 0.9688

✅ Best model saved as 'resnet50_final_best.h5'


MobileNetV2

In [22]:
# ✅ MobileNetV2 Training Script with Improvements for Accuracy

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
# Import the specific preprocessing function for MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers.experimental import AdamW # Or tf.keras.optimizers if using older TF
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
import numpy as np
import os
import shutil # For splitting data
import sys # To exit script gracefully

# 🔹 Set seed for reproducibility
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

# 🔹 Dataset path (<<<<< VERIFY THIS PATH AND STRUCTURE >>>>>)
base_dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"
train_dir = os.path.join(base_dataset_path, "train")
val_dir = os.path.join(base_dataset_path, "validation")
VAL_SPLIT = 0.2 # Use 20% of data for validation

# 🔹 Helper Function to Split Data (Identical to previous scripts)
def split_data(base_path, train_path, val_path, split_ratio=0.2):
    # --- (Assume the improved split_data function from the previous answer is here) ---
    if os.path.exists(train_path) and os.path.exists(val_path):
        print(f"Train/Validation directories ('{os.path.basename(train_path)}', '{os.path.basename(val_path)}') already exist in '{base_path}'. Skipping split.")
        if not any(os.scandir(train_path)) or not any(os.scandir(val_path)):
             print("WARNING: Existing train/validation directories appear empty. Consider deleting them and re-running if data is missing.")
        return True
    print(f"Attempting to create train/validation split ({1-split_ratio:.0%}/{split_ratio:.0%}) from '{base_path}'...")
    try:
        potential_class_dirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    except FileNotFoundError:
        print(f"ERROR: The specified base_dataset_path '{base_path}' was not found.")
        return False
    except Exception as e:
        print(f"ERROR: Could not list directories in '{base_path}': {e}")
        return False
    class_folders = [d for d in potential_class_dirs if d.lower() not in ['train', 'validation']]
    if not class_folders:
         print(f"ERROR: No class subdirectories found directly inside '{base_path}'.")
         print(f"       Found items: {os.listdir(base_path)}")
         return False
    print(f"Found class folders: {class_folders}")
    os.makedirs(train_path, exist_ok=True); os.makedirs(val_path, exist_ok=True)
    print(f"Created directories: '{train_path}' and '{val_path}'")
    total_copied_train = 0; total_copied_val = 0
    for class_name in class_folders:
        class_dir = os.path.join(base_path, class_name)
        train_class_dir = os.path.join(train_path, class_name); val_class_dir = os.path.join(val_path, class_name)
        os.makedirs(train_class_dir, exist_ok=True); os.makedirs(val_class_dir, exist_ok=True)
        try:
            images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
        except Exception as e: print(f"Warning: Could not read files in {class_dir}: {e}"); continue
        if not images: print(f"Warning: No image files found in source directory: {class_dir}"); continue
        np.random.shuffle(images)
        split_point = int(len(images) * (1 - split_ratio)); train_files = images[:split_point]; val_files = images[split_point:]
        print(f"  Splitting '{class_name}': {len(images)} images found -> {len(train_files)} train, {len(val_files)} val")
        copied_train_count = 0
        for f in train_files:
            src_file = os.path.join(class_dir, f); dst_file = os.path.join(train_class_dir, f)
            try: shutil.copy2(src_file, dst_file); copied_train_count += 1
            except Exception as e: print(f"    ERROR copying {src_file} to {dst_file}: {e}")
        total_copied_train += copied_train_count
        copied_val_count = 0
        for f in val_files:
            src_file = os.path.join(class_dir, f); dst_file = os.path.join(val_class_dir, f)
            try: shutil.copy2(src_file, dst_file); copied_val_count += 1
            except Exception as e: print(f"    ERROR copying {src_file} to {dst_file}: {e}")
        total_copied_val += copied_val_count
        if copied_train_count != len(train_files) or copied_val_count != len(val_files): print(f"  Warning: Mismatch in expected vs copied files for class '{class_name}'")
    print(f"Data splitting complete. Copied {total_copied_train} train images, {total_copied_val} validation images.")
    if total_copied_train == 0 or total_copied_val == 0: print("ERROR: Failed to copy any images."); return False
    return True

# --- Run the data splitting function ---
if not split_data(base_dataset_path, train_dir, val_dir, split_ratio=VAL_SPLIT):
    print("\nExiting script due to data splitting errors.")
    sys.exit(1) # Exit if splitting failed critically
# ---------------------------------------


# 🔹 Data Augmentation & Generators
IMG_SIZE = (224, 224) # Standard MobileNetV2 input size
BATCH_SIZE = 32 # MobileNetV2 is usually less memory intensive, 32 or 64 might work

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input, # Use MobileNetV2 preprocessing
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Validation generator ONLY uses MobileNetV2 preprocess_input
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

print(f"\nCreating Train Generator from: {train_dir}")
try:
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True,
        seed=SEED
    )
except Exception as e: print(f"ERROR: Failed to create train_generator: {e}"); sys.exit(1)

print(f"Creating Validation Generator from: {val_dir}")
try:
    val_generator = val_datagen.flow_from_directory(
        val_dir,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
except Exception as e: print(f"ERROR: Failed to create val_generator: {e}"); sys.exit(1)

# Check number of classes AND samples
num_classes = train_generator.num_classes
train_samples = train_generator.samples
val_samples = val_generator.samples

print(f"\nFound {train_samples} train images belonging to {num_classes} classes.")
print(f"Found {val_samples} validation images belonging to {num_classes} classes.")

if train_samples == 0 or val_samples == 0 or num_classes <= 1: # Need at least 2 classes
    print("\nERROR: Generators reported 0 images or <= 1 class. Check data directories and splitting.")
    sys.exit(1)

# 🔹 Load MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), alpha=1.0) # alpha controls width
base_model.trainable = False  # Freeze base initially

# 🔹 Add classification head (similar structure often works well)
x = base_model.output
x = GlobalAveragePooling2D(name='global_avg_pool')(x)
x = Dense(512, activation='relu', kernel_regularizer=l2(0.001), name='dense_head')(x) # Adjust units/regularization if needed
x = BatchNormalization(name='bn_head')(x)
x = Dropout(0.3, name='dropout_head')(x) # Adjust dropout rate if needed (maybe slightly less than VGG/EfficientNet)
outputs = Dense(num_classes, activation='softmax', name='predictions')(x)

model = Model(inputs=base_model.input, outputs=outputs)

# 🔹 Compile model for Phase 1
initial_lr = 1e-3
optimizer_phase1 = AdamW(learning_rate=initial_lr, weight_decay=1e-4)
model.compile(
    optimizer=optimizer_phase1,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary (Phase 1 - MobileNetV2 Frozen Base) ---")
model.summary()

# 🔹 Callbacks
model_weights_file = 'mobilenetv2_best_weights.h5'
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1) # Moderate patience
model_checkpoint = ModelCheckpoint(model_weights_file, monitor='val_accuracy', save_best_only=True,
                                   save_weights_only=True, verbose=1)

callbacks_phase1 = [reduce_lr, early_stopping, model_checkpoint]

# Calculate steps, ensuring they are at least 1
steps_per_epoch = max(1, train_samples // BATCH_SIZE)
validation_steps = max(1, val_samples // BATCH_SIZE)

# 🔹 PHASE 1: Training with frozen base
print("\n🔹 Phase 1: MobileNetV2 - Frozen base")
initial_epochs = 20 # MobileNet head might converge reasonably fast
history_phase1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=initial_epochs,
    callbacks=callbacks_phase1,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

# Find the epoch with the best validation accuracy in phase 1
best_val_acc_phase1 = 0
if history_phase1 and 'val_accuracy' in history_phase1.history and history_phase1.history['val_accuracy']:
    best_epoch_phase1 = np.argmax(history_phase1.history['val_accuracy'])
    best_val_acc_phase1 = np.max(history_phase1.history['val_accuracy'])
    print(f"\nPhase 1 Best Validation Accuracy: {best_val_acc_phase1:.4f} at epoch {best_epoch_phase1 + 1}")
else:
    print("\nWarning: No validation accuracy history found for Phase 1.")

# Load the best weights found during Phase 1 before fine-tuning
if os.path.exists(model_weights_file) and best_val_acc_phase1 > 0:
     print(f"Loading best weights from Phase 1 ({model_weights_file})...")
     try: model.load_weights(model_weights_file)
     except Exception as e: print(f"Error loading weights: {e}. Continuing without loading.")
else:
    print("Best weights file not found or no improvement in Phase 1. Proceeding with current weights.")


# 🔹 PHASE 2: Fine-tuning
print("\n🔹 Phase 2: MobileNetV2 - Fine-tuning")

# Unfreeze layers from around block 13 or 14 onwards (MobileNetV2 has 16 blocks total)
base_model.trainable = True
fine_tune_from_layer_name = 'block_13_expand' # Experiment with 'block_14_expand' or earlier/later blocks
unfreeze_from_index = None
for i, layer in enumerate(base_model.layers):
    if layer.name == fine_tune_from_layer_name:
        unfreeze_from_index = i
        break

if unfreeze_from_index is not None:
    print(f"Unfreezing layers from index {unfreeze_from_index} ('{fine_tune_from_layer_name}') onwards.")
    for layer in base_model.layers[:unfreeze_from_index]:
        layer.trainable = False
    # Keep Batch Normalization layers frozen in the unfrozen part
    for layer in base_model.layers[unfreeze_from_index:]:
         if isinstance(layer, BatchNormalization):
             print(f"  Keeping BN layer frozen: {layer.name}")
             layer.trainable = False
         else:
            # Ensure non-BN layers in the unfrozen part are trainable
             layer.trainable = True
else:
    print(f"Warning: Layer '{fine_tune_from_layer_name}' not found. Unfreezing all base layers.")
    # Apply BN freezing logic to all base layers if specific layer not found
    for layer in base_model.layers:
         if isinstance(layer, BatchNormalization):
             layer.trainable = False


# Recompile with a very low LR for fine-tuning
fine_tune_lr = 2e-5 # Start slightly higher than VGG/EfficientNet maybe, e.g., 1e-5 or 2e-5
optimizer_phase2 = AdamW(learning_rate=fine_tune_lr, weight_decay=1e-5)
model.compile(
    optimizer=optimizer_phase2,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary (Phase 2: Fine-tuning MobileNetV2) ---")
model.summary()

# Callbacks for fine-tuning
reduce_lr_ft = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=4, min_lr=1e-7, verbose=1)
early_stopping_ft = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1)
model_checkpoint_ft = ModelCheckpoint(model_weights_file, monitor='val_accuracy', save_best_only=True,
                                      save_weights_only=True, verbose=1) # Continue saving best

callbacks_phase2 = [reduce_lr_ft, early_stopping_ft, model_checkpoint_ft]

# Train model further with fine-tuning
total_epochs = 60 # Adjust total epochs
fine_tune_epochs = total_epochs - initial_epochs

print(f"Starting fine-tuning for up to {fine_tune_epochs} epochs (total epochs: {total_epochs})...")
history_phase2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=initial_epochs, # Resume epoch counting
    callbacks=callbacks_phase2,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

# 🔹 Evaluation after Fine-Tuning
print("\nLoading best weights achieved during entire training...")
if os.path.exists(model_weights_file):
    print(f"Loading best weights from {model_weights_file} for final evaluation...")
    try: model.load_weights(model_weights_file)
    except Exception as e: print(f"Error loading final best weights: {e}. Evaluating with current weights.")
else:
    print("Best weights file not found. Evaluating with final weights from training.")

print("\nEvaluating model with best weights on validation set:")
eval_validation_steps = max(1, val_samples // BATCH_SIZE)
loss, accuracy = model.evaluate(val_generator, steps=eval_validation_steps)
print(f"Final Validation Loss: {loss:.4f}")
print(f"Final Validation Accuracy: {accuracy:.4f}")

if accuracy >= 0.91:
    print(f"\n🎉 Target accuracy reached! Final Validation Accuracy: {accuracy:.4f}")
else:
    print(f"\nTarget accuracy of 0.91 not reached. Final Best Validation Accuracy: {accuracy:.4f}")
    print("Consider further experimentation (epochs, LR, unfrozen layers, head architecture, augmentation).")


# 🔹 Save final model (architecture + best weights)
final_model_path = "mobilenetv2_final_best.h5"
model.save(final_model_path)
print(f"\n✅ Best model saved as '{final_model_path}'")

Train/Validation directories ('train', 'validation') already exist in '/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset'. Skipping split.

Creating Train Generator from: /data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset/train
Found 6320 images belonging to 18 classes.
Creating Validation Generator from: /data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset/validation
Found 1587 images belonging to 18 classes.

Found 6320 train images belonging to 18 classes.
Found 1587 validation images belonging to 18 classes.

--- Model Summary (Phase 1 - MobileNetV2 Frozen Base) ---
Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_12 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                          

2025-04-10 09:46:08.954873: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-10 09:47:16.236475: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_accuracy improved from -inf to 0.89605, saving model to mobilenetv2_best_weights.h5
Epoch 2/20
Epoch 2: val_accuracy improved from 0.89605 to 0.89796, saving model to mobilenetv2_best_weights.h5
Epoch 3/20
Epoch 3: val_accuracy improved from 0.89796 to 0.91135, saving model to mobilenetv2_best_weights.h5
Epoch 4/20
Epoch 4: val_accuracy did not improve from 0.91135
Epoch 5/20
Epoch 5: val_accuracy improved from 0.91135 to 0.91709, saving model to mobilenetv2_best_weights.h5
Epoch 6/20
Epoch 6: val_accuracy improved from 0.91709 to 0.92347, saving model to mobilenetv2_best_weights.h5
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.92347
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.92347
Epoch 9/20
Epoch 9: val_accuracy did not improve from 0.92347
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.92347
Epoch 11/20
Epoch 11: val_accuracy did not improve from 0.92347
Epoch 12/20
Epoch 12: val_accuracy did not improve from 0.92347
Epoch 13/20
Epoch 

2025-04-10 10:11:14.391134: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-10 10:12:20.714246: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 21: val_accuracy improved from -inf to 0.91390, saving model to mobilenetv2_best_weights.h5
Epoch 22/60
Epoch 22: val_accuracy improved from 0.91390 to 0.92921, saving model to mobilenetv2_best_weights.h5
Epoch 23/60
Epoch 23: val_accuracy did not improve from 0.92921
Epoch 24/60
Epoch 24: val_accuracy improved from 0.92921 to 0.93686, saving model to mobilenetv2_best_weights.h5
Epoch 25/60
Epoch 25: val_accuracy improved from 0.93686 to 0.94069, saving model to mobilenetv2_best_weights.h5
Epoch 26/60
Epoch 26: val_accuracy did not improve from 0.94069
Epoch 27/60
Epoch 27: val_accuracy did not improve from 0.94069
Epoch 28/60
Epoch 28: val_accuracy did not improve from 0.94069
Epoch 29/60
Epoch 29: val_accuracy improved from 0.94069 to 0.94579, saving model to mobilenetv2_best_weights.h5
Epoch 30/60
Epoch 30: val_accuracy did not improve from 0.94579
Epoch 31/60
Epoch 31: val_accuracy did not improve from 0.94579
Epoch 32/60
Epoch 32: val_accuracy did not improve from 0.94579
E

2025-04-10 10:59:38.186605: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Final Validation Loss: 0.2060
Final Validation Accuracy: 0.9617

🎉 Target accuracy reached! Final Validation Accuracy: 0.9617

✅ Best model saved as 'mobilenetv2_final_best.h5'


VGG16

In [21]:
# ✅ VGG16 Training Script with Improvements for Accuracy

import tensorflow as tf
from tensorflow.keras.applications import VGG16
# Import the specific preprocessing function for VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization, Flatten
from tensorflow.keras.optimizers.experimental import AdamW # Or tf.keras.optimizers if using older TF
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
import numpy as np
import os
import shutil # For splitting data
import sys # To exit script gracefully

# 🔹 Set seed for reproducibility
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

# 🔹 Dataset path (<<<<< VERIFY THIS PATH AND STRUCTURE >>>>>)
base_dataset_path = r"/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset"
train_dir = os.path.join(base_dataset_path, "train")
val_dir = os.path.join(base_dataset_path, "validation")
VAL_SPLIT = 0.2 # Use 20% of data for validation

# 🔹 Helper Function to Split Data (Identical to previous scripts)
def split_data(base_path, train_path, val_path, split_ratio=0.2):
    # --- (Assume the improved split_data function from the previous answer is here) ---
    if os.path.exists(train_path) and os.path.exists(val_path):
        print(f"Train/Validation directories ('{os.path.basename(train_path)}', '{os.path.basename(val_path)}') already exist in '{base_path}'. Skipping split.")
        if not any(os.scandir(train_path)) or not any(os.scandir(val_path)):
             print("WARNING: Existing train/validation directories appear empty. Consider deleting them and re-running if data is missing.")
        return True
    print(f"Attempting to create train/validation split ({1-split_ratio:.0%}/{split_ratio:.0%}) from '{base_path}'...")
    try:
        potential_class_dirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    except FileNotFoundError:
        print(f"ERROR: The specified base_dataset_path '{base_path}' was not found.")
        return False
    except Exception as e:
        print(f"ERROR: Could not list directories in '{base_path}': {e}")
        return False
    class_folders = [d for d in potential_class_dirs if d.lower() not in ['train', 'validation']]
    if not class_folders:
         print(f"ERROR: No class subdirectories found directly inside '{base_path}'.")
         print(f"       Found items: {os.listdir(base_path)}")
         return False
    print(f"Found class folders: {class_folders}")
    os.makedirs(train_path, exist_ok=True); os.makedirs(val_path, exist_ok=True)
    print(f"Created directories: '{train_path}' and '{val_path}'")
    total_copied_train = 0; total_copied_val = 0
    for class_name in class_folders:
        class_dir = os.path.join(base_path, class_name)
        train_class_dir = os.path.join(train_path, class_name); val_class_dir = os.path.join(val_path, class_name)
        os.makedirs(train_class_dir, exist_ok=True); os.makedirs(val_class_dir, exist_ok=True)
        try:
            images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
        except Exception as e: print(f"Warning: Could not read files in {class_dir}: {e}"); continue
        if not images: print(f"Warning: No image files found in source directory: {class_dir}"); continue
        np.random.shuffle(images)
        split_point = int(len(images) * (1 - split_ratio)); train_files = images[:split_point]; val_files = images[split_point:]
        print(f"  Splitting '{class_name}': {len(images)} images found -> {len(train_files)} train, {len(val_files)} val")
        copied_train_count = 0
        for f in train_files:
            src_file = os.path.join(class_dir, f); dst_file = os.path.join(train_class_dir, f)
            try: shutil.copy2(src_file, dst_file); copied_train_count += 1
            except Exception as e: print(f"    ERROR copying {src_file} to {dst_file}: {e}")
        total_copied_train += copied_train_count
        copied_val_count = 0
        for f in val_files:
            src_file = os.path.join(class_dir, f); dst_file = os.path.join(val_class_dir, f)
            try: shutil.copy2(src_file, dst_file); copied_val_count += 1
            except Exception as e: print(f"    ERROR copying {src_file} to {dst_file}: {e}")
        total_copied_val += copied_val_count
        if copied_train_count != len(train_files) or copied_val_count != len(val_files): print(f"  Warning: Mismatch in expected vs copied files for class '{class_name}'")
    print(f"Data splitting complete. Copied {total_copied_train} train images, {total_copied_val} validation images.")
    if total_copied_train == 0 or total_copied_val == 0: print("ERROR: Failed to copy any images."); return False
    return True


# --- Run the data splitting function ---
if not split_data(base_dataset_path, train_dir, val_dir, split_ratio=VAL_SPLIT):
    print("\nExiting script due to data splitting errors.")
    sys.exit(1) # Exit if splitting failed critically
# ---------------------------------------


# 🔹 Data Augmentation & Generators
IMG_SIZE = (224, 224) # Standard VGG16 input size
BATCH_SIZE = 32 # Adjust based on GPU memory (VGG16 can be memory intensive)

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input, # Use VGG16 preprocessing
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
    # Removed brightness range, less common with VGG preprocess_input, can add back if needed
)

# Validation generator ONLY uses VGG16 preprocess_input
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

print(f"\nCreating Train Generator from: {train_dir}")
try:
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True,
        seed=SEED
    )
except Exception as e: print(f"ERROR: Failed to create train_generator: {e}"); sys.exit(1)

print(f"Creating Validation Generator from: {val_dir}")
try:
    val_generator = val_datagen.flow_from_directory(
        val_dir,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )
except Exception as e: print(f"ERROR: Failed to create val_generator: {e}"); sys.exit(1)

# Check number of classes AND samples
num_classes = train_generator.num_classes
train_samples = train_generator.samples
val_samples = val_generator.samples

print(f"\nFound {train_samples} train images belonging to {num_classes} classes.")
print(f"Found {val_samples} validation images belonging to {num_classes} classes.")

if train_samples == 0 or val_samples == 0 or num_classes <= 1: # Need at least 2 classes
    print("\nERROR: Generators reported 0 images or <= 1 class. Check data directories and splitting.")
    sys.exit(1)

# 🔹 Load VGG16
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False  # Freeze base initially

# 🔹 Add classification head
# VGG output is not pooled by default, need Flatten or GAP
x = base_model.output
x = GlobalAveragePooling2D(name='global_avg_pool')(x) # Or Flatten() - GAP often works better against overfitting
x = Dense(512, activation='relu', kernel_regularizer=l2(0.001), name='fc1_head')(x) # Regularization might be important for VGG
x = BatchNormalization(name='bn_head')(x) # Add BN for stability
x = Dropout(0.5, name='dropout_head')(x) # Increase dropout for VGG
outputs = Dense(num_classes, activation='softmax', name='predictions')(x)

model = Model(inputs=base_model.input, outputs=outputs)

# 🔹 Compile model for Phase 1
initial_lr = 1e-3 # May need to be slightly lower for VGG (e.g., 5e-4) if unstable
optimizer_phase1 = AdamW(learning_rate=initial_lr, weight_decay=1e-4)
model.compile(
    optimizer=optimizer_phase1,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary (Phase 1 - VGG16 Frozen Base) ---")
model.summary()

# 🔹 Callbacks
model_weights_file = 'vgg16_best_weights.h5'
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
# Increased patience slightly for VGG as it might take longer to stabilize
early_stopping = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1)
model_checkpoint = ModelCheckpoint(model_weights_file, monitor='val_accuracy', save_best_only=True,
                                   save_weights_only=True, verbose=1)

callbacks_phase1 = [reduce_lr, early_stopping, model_checkpoint]

# Calculate steps, ensuring they are at least 1
steps_per_epoch = max(1, train_samples // BATCH_SIZE)
validation_steps = max(1, val_samples // BATCH_SIZE)

# 🔹 PHASE 1: Training with frozen base
print("\n🔹 Phase 1: VGG16 - Frozen base")
initial_epochs = 15 # VGG head might train faster or slower, adjust as needed
history_phase1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=initial_epochs,
    callbacks=callbacks_phase1,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

# Find the epoch with the best validation accuracy in phase 1
best_val_acc_phase1 = 0
if history_phase1 and 'val_accuracy' in history_phase1.history and history_phase1.history['val_accuracy']:
    best_epoch_phase1 = np.argmax(history_phase1.history['val_accuracy'])
    best_val_acc_phase1 = np.max(history_phase1.history['val_accuracy'])
    print(f"\nPhase 1 Best Validation Accuracy: {best_val_acc_phase1:.4f} at epoch {best_epoch_phase1 + 1}")
else:
    print("\nWarning: No validation accuracy history found for Phase 1.")

# Load the best weights found during Phase 1 before fine-tuning
if os.path.exists(model_weights_file) and best_val_acc_phase1 > 0:
     print(f"Loading best weights from Phase 1 ({model_weights_file})...")
     try: model.load_weights(model_weights_file)
     except Exception as e: print(f"Error loading weights: {e}. Continuing without loading.")
else:
    print("Best weights file not found or no improvement in Phase 1. Proceeding with current weights.")


# 🔹 PHASE 2: Fine-tuning
print("\n🔹 Phase 2: VGG16 - Fine-tuning")

# Unfreeze the top convolutional block (block5)
base_model.trainable = True
fine_tune_from_layer_name = 'block5_conv1'
unfreeze_from_index = None
for i, layer in enumerate(base_model.layers):
    if layer.name == fine_tune_from_layer_name:
        unfreeze_from_index = i
        break

if unfreeze_from_index is not None:
    print(f"Unfreezing layers from index {unfreeze_from_index} ('{fine_tune_from_layer_name}') onwards.")
    for layer in base_model.layers[:unfreeze_from_index]:
        layer.trainable = False
    # VGG16 base doesn't have Batch Norm layers to worry about freezing
else:
    print(f"Warning: Layer '{fine_tune_from_layer_name}' not found. Unfreezing all base layers.")
    # No specific BN logic needed here for VGG base


# Recompile with a very low LR for fine-tuning
fine_tune_lr = 1e-5 # Crucial to use a low LR
optimizer_phase2 = AdamW(learning_rate=fine_tune_lr, weight_decay=1e-5)
model.compile(
    optimizer=optimizer_phase2,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\n--- Model Summary (Phase 2: Fine-tuning VGG16) ---")
model.summary()

# Callbacks for fine-tuning (adjust patience)
reduce_lr_ft = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=4, min_lr=1e-7, verbose=1)
early_stopping_ft = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1) # More patience for fine-tuning
model_checkpoint_ft = ModelCheckpoint(model_weights_file, monitor='val_accuracy', save_best_only=True,
                                      save_weights_only=True, verbose=1) # Continue saving best

callbacks_phase2 = [reduce_lr_ft, early_stopping_ft, model_checkpoint_ft]

# Train model further with fine-tuning
total_epochs = 50 # Adjust total epochs as needed for VGG
fine_tune_epochs = total_epochs - initial_epochs

print(f"Starting fine-tuning for up to {fine_tune_epochs} epochs (total epochs: {total_epochs})...")
history_phase2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=total_epochs,
    initial_epoch=initial_epochs, # Resume epoch counting
    callbacks=callbacks_phase2,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

# 🔹 Evaluation after Fine-Tuning
print("\nLoading best weights achieved during entire training...")
if os.path.exists(model_weights_file):
    print(f"Loading best weights from {model_weights_file} for final evaluation...")
    try: model.load_weights(model_weights_file)
    except Exception as e: print(f"Error loading final best weights: {e}. Evaluating with current weights.")
else:
    print("Best weights file not found. Evaluating with final weights from training.")

print("\nEvaluating model with best weights on validation set:")
eval_validation_steps = max(1, val_samples // BATCH_SIZE)
loss, accuracy = model.evaluate(val_generator, steps=eval_validation_steps)
print(f"Final Validation Loss: {loss:.4f}")
print(f"Final Validation Accuracy: {accuracy:.4f}")

if accuracy >= 0.91:
    print(f"\n🎉 Target accuracy reached! Final Validation Accuracy: {accuracy:.4f}")
else:
    print(f"\nTarget accuracy of 0.91 not reached. Final Best Validation Accuracy: {accuracy:.4f}")
    print("Consider further experimentation (epochs, LR, unfrozen layers [e.g., block4_conv1], regularization, augmentation).")


# 🔹 Save final model (architecture + best weights)
final_model_path = "vgg16_final_best.h5"
model.save(final_model_path)
print(f"\n✅ Best model saved as '{final_model_path}'")

Train/Validation directories ('train', 'validation') already exist in '/data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset'. Skipping split.

Creating Train Generator from: /data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset/train
Found 6320 images belonging to 18 classes.
Creating Validation Generator from: /data/mpstme-aruja/Farm-Aid/Farm-Aid Dataset/Capstone Dataset/validation
Found 1587 images belonging to 18 classes.

Found 6320 train images belonging to 18 classes.
Found 1587 validation images belonging to 18 classes.

--- Model Summary (Phase 1 - VGG16 Frozen Base) ---
Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                  

2025-04-10 08:42:39.771900: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-10 08:43:43.850216: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_accuracy improved from -inf to 0.86607, saving model to vgg16_best_weights.h5
Epoch 2/15
Epoch 2: val_accuracy improved from 0.86607 to 0.90242, saving model to vgg16_best_weights.h5
Epoch 3/15
Epoch 3: val_accuracy did not improve from 0.90242
Epoch 4/15
Epoch 4: val_accuracy did not improve from 0.90242
Epoch 5/15
Epoch 5: val_accuracy did not improve from 0.90242
Epoch 6/15
Epoch 6: val_accuracy improved from 0.90242 to 0.91837, saving model to vgg16_best_weights.h5
Epoch 7/15
Epoch 7: val_accuracy did not improve from 0.91837
Epoch 8/15
Epoch 8: val_accuracy improved from 0.91837 to 0.91964, saving model to vgg16_best_weights.h5
Epoch 9/15
Epoch 9: val_accuracy improved from 0.91964 to 0.92283, saving model to vgg16_best_weights.h5
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.92283
Epoch 11/15
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.

Epoch 11: val_accuracy did not improve from 0.92283
Epoch 12/15
Epoch 12: val_accurac

2025-04-10 09:01:26.701441: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2025-04-10 09:02:32.391239: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Epoch 16: val_accuracy improved from -inf to 0.94260, saving model to vgg16_best_weights.h5
Epoch 17/50
Epoch 17: val_accuracy did not improve from 0.94260
Epoch 18/50
Epoch 18: val_accuracy improved from 0.94260 to 0.94834, saving model to vgg16_best_weights.h5
Epoch 19/50
Epoch 19: val_accuracy did not improve from 0.94834
Epoch 20/50
Epoch 20: val_accuracy improved from 0.94834 to 0.95344, saving model to vgg16_best_weights.h5
Epoch 21/50
Epoch 21: val_accuracy improved from 0.95344 to 0.95855, saving model to vgg16_best_weights.h5
Epoch 22/50
Epoch 22: val_accuracy did not improve from 0.95855
Epoch 23/50
Epoch 23: val_accuracy did not improve from 0.95855
Epoch 24/50
Epoch 24: val_accuracy did not improve from 0.95855
Epoch 25/50
Epoch 25: val_accuracy did not improve from 0.95855
Epoch 26/50
Epoch 26: val_accuracy did not improve from 0.95855
Epoch 27/50
Epoch 27: val_accuracy did not improve from 0.95855
Epoch 28/50
Epoch 28: val_accuracy did not improve from 0.95855
Epoch 29/5

2025-04-10 09:45:56.681525: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Final Validation Loss: 0.2671
Final Validation Accuracy: 0.9681

🎉 Target accuracy reached! Final Validation Accuracy: 0.9681

✅ Best model saved as 'vgg16_final_best.h5'
