# 1. Setup and Imports
This block imports all the necessary libraries for the project. We'll need `tensorflow` and `keras` for building and training the model, `numpy` for numerical operations, `matplotlib` for plotting, and `sklearn` for evaluation metrics.

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import cv2
from PIL import Image
from collections import Counter

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

print(f'TensorFlow Version: {tf.__version__}')

# 2. Data Loading and Preparation
Here, we define the path to our dataset and the class labels. We'll then load all images, resize them to a uniform size, and convert them to NumPy arrays. The labels are also prepared and the data is split into training, validation, and testing sets.

In [None]:
DATASET_PATH = r'C:\Users\mbuto\Agri-sol\Dataset\Bean_Dataset'
CLASSES = ['angular_leaf_spot', 'bean_rust', 'healthy']
NUM_CLASSES = len(CLASSES)
IMAGE_SIZE = (224, 224) # VGG16 was trained on 224x224 images

def load_data(dataset_path, classes, image_size):
    image_list, label_list = [], []
    for i, class_name in enumerate(classes):
        class_path = os.path.join(dataset_path, class_name)
        print(f'Loading images from: {class_path}')
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            try:
                image = cv2.imread(image_path)
                if image is not None:
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Matplotlib uses RGB
                    image = cv2.resize(image, image_size)
                    image_list.append(img_to_array(image))
                    label_list.append(i)
            except Exception as e:
                print(f'Error loading image {image_path}: {e}')
    return np.array(image_list), np.array(label_list)

X, y = load_data(DATASET_PATH, CLASSES, IMAGE_SIZE)
print(f'
Total images loaded: {len(X)}')
print(f'Class distribution: {Counter(y)}')

## 2.1. Split Data and Preprocess
We normalize the pixel values to be between 0 and 1 and perform one-hot encoding on the labels. The data is split into 70% for training, 15% for validation, and 15% for testing.

In [None]:
# Normalize pixel values
X = X.astype('float32') / 255.0

# One-hot encode labels
y_cat = to_categorical(y, NUM_CLASSES)

# First split: 70% train, 30% temp (val + test)
x_train, x_temp, y_train, y_temp = train_test_split(X, y_cat, test_size=0.3, random_state=42, stratify=y_cat)

# Second split: 15% validation, 15% test from the temp set
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

print(f'Training data shape: {x_train.shape}')
print(f'Validation data shape: {x_val.shape}')
print(f'Testing data shape: {x_test.shape}')

# 3. Data Augmentation
To prevent overfitting and make the model more robust, we apply data augmentation to the training images. This creates modified versions of the images with random rotations, shifts, zooms, and flips.

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow(x_train, y_train, batch_size=32)

# 4. Model Building (Transfer Learning)
We use VGG16, a model pre-trained on the large ImageNet dataset, as our base. We freeze its layers to leverage its learned features and add our own custom classifier on top. This approach is powerful and efficient for image classification.

In [None]:
# Load the VGG16 base model without the top classifier
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

# Freeze the convolutional base
base_model.trainable = False

# Create the new model on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x) # Regularization
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# 5. Model Training
Now we train the model. We use callbacks to save the best version of the model (`ModelCheckpoint`), stop training early if performance doesn't improve (`EarlyStopping`), and reduce the learning rate on a plateau (`ReduceLROnPlateau`).

In [None]:
EPOCHS = 50
BATCH_SIZE = 32

callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1),
    ModelCheckpoint(filepath='bean_disease_model_best.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
]

history = model.fit(
    train_generator,
    steps_per_epoch=len(x_train) // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(x_val, y_val),
    callbacks=callbacks
)

# 6. Evaluation
After training, we evaluate the model's performance. We plot the accuracy and loss curves to visualize the training process and then generate a classification report and confusion matrix to see how well the model performs on each class in the test set. This helps ensure the model is unbiased.

In [None]:
def plot_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='train_accuracy')
    ax1.plot(history.history['val_accuracy'], label='val_accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    # Plot loss
    ax2.plot(history.history['loss'], label='train_loss')
    ax2.plot(history.history['val_loss'], label='val_loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    plt.show()

plot_history(history)

# Evaluate on test data
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f'
Test Accuracy: {accuracy*100:.2f}%')
print(f'Test Loss: {loss:.4f}')

## 6.1. Classification Report and Confusion Matrix
This provides a detailed breakdown of the model's predictions, showing precision, recall, and F1-score for each class. The confusion matrix visualizes where the model is making correct and incorrect predictions.

In [None]:
# Get predictions
y_pred_probs = model.predict(x_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Classification Report
print('Classification Report:')
print(classification_report(y_true_classes, y_pred_classes, target_names=CLASSES))

# Confusion Matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=CLASSES, yticklabels=CLASSES)
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
plt.show()