In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Model

# ---------------------
# Step 1: Load and Preprocess Images
# ---------------------

base_path = r'D:\FInance_Analyst_Agent\Rice-Disease-Simulation\data\train'  # Change to 'Val/rgb' if needed
img_size = (224, 224)

images = []
labels = []
class_names = sorted(os.listdir(base_path))
label_map = {name: idx for idx, name in enumerate(class_names)}

for class_name in class_names:
    class_path = os.path.join(base_path, class_name)
    if not os.path.isdir(class_path):
        continue
    for img_file in os.listdir(class_path):
        img_path = os.path.join(class_path, img_file)
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.resize(img, img_size)
        images.append(img)
        labels.append(label_map[class_name])

images = np.array(images)
labels = np.array(labels)
images = preprocess_input(images)

print(f"✅ Loaded {len(images)} images across {len(class_names)} classes.")

# ---------------------
# Step 2: Feature Extraction using MobileNetV2
# ---------------------

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

features = model.predict(images, batch_size=32, verbose=1)
features = features.reshape(features.shape[0], -1)

print(f"✅ Feature shape: {features.shape}")

# ---------------------
# Step 3: Train/Test Split
# ---------------------

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)

# ---------------------
# Step 4: Train KNN Classifier
# ---------------------

knn = KNeighborsClassifier(n_neighbors=5, weights='distance', n_jobs=-1)
knn.fit(X_train, y_train)

# ---------------------
# Step 5: Evaluate
# ---------------------

y_pred = knn.predict(X_test)

print("\n✅ Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=class_names))

print("✅ Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


✅ Loaded 2100 images across 6 classes.
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 208ms/step
✅ Feature shape: (2100, 62720)

✅ Classification Report:

                       precision    recall  f1-score   support

bacterial_leaf_blight       0.97      0.99      0.98        70
           brown_spot       0.78      0.80      0.79        70
              healthy       0.79      0.87      0.83        70
           leaf_blast       0.79      0.66      0.72        70
           leaf_scald       0.97      1.00      0.99        70
    narrow_brown_spot       0.94      0.94      0.94        70

             accuracy                           0.88       420
            macro avg       0.87      0.88      0.87       420
         weighted avg       0.87      0.88      0.87       420

✅ Confusion Matrix:
[[69  0  0  0  0  1]
 [ 1 56  6  5  0  2]
 [ 0  3 61  5  0  1]
 [ 1 12 10 46  1  0]
 [ 0  0  0  0 70  0]
 [ 0  1  0  2  1 66]]


untouched code on validation set

In [10]:
val_path = r'D:\Rice-Disease-Simulation\data\Validation'  # Validation set path
val_images = []
val_labels = []
class_names = sorted(os.listdir(val_path))
img_size = (224, 224)
label_map = {name: idx for idx, name in enumerate(class_names)}

for class_name in class_names:  # Use the same class_names and label_map
    class_path = os.path.join(val_path, class_name)
    if not os.path.isdir(class_path):
        continue
    for img_file in os.listdir(class_path):
        img_path = os.path.join(class_path, img_file)
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.resize(img, img_size)
        val_images.append(img)
        val_labels.append(label_map[class_name])

val_images = np.array(val_images)
val_labels = np.array(val_labels)
val_images = preprocess_input(val_images)

print(f"\n📦 Loaded {len(val_images)} validation images.")

# ---------------------
# Step 7: Feature Extraction on Validation Set
# ---------------------

val_features = model.predict(val_images, batch_size=32, verbose=1)
val_features = val_features.reshape(val_features.shape[0], -1)

print(f"🔍 Validation Feature shape: {val_features.shape}")

# ---------------------
# Step 8: Inference and Evaluation
# ---------------------

val_preds = knn.predict(val_features)

print("\n📊 Classification Report (Validation Set):\n")
print(classification_report(val_labels, val_preds, target_names=class_names))

print("🧾 Confusion Matrix (Validation Set):")
print(confusion_matrix(val_labels, val_preds))


📦 Loaded 528 validation images.
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 196ms/step
🔍 Validation Feature shape: (528, 62720)

📊 Classification Report (Validation Set):

                       precision    recall  f1-score   support

bacterial_leaf_blight       0.99      0.95      0.97        88
           brown_spot       0.80      0.82      0.81        88
              healthy       0.82      0.91      0.86        88
           leaf_blast       0.87      0.69      0.77        88
           leaf_scald       0.99      0.97      0.98        88
    narrow_brown_spot       0.89      1.00      0.94        88

             accuracy                           0.89       528
            macro avg       0.89      0.89      0.89       528
         weighted avg       0.89      0.89      0.89       528

🧾 Confusion Matrix (Validation Set):
[[84  0  0  0  0  4]
 [ 0 72  7  7  0  2]
 [ 1  5 80  2  0  0]
 [ 0 13 11 61  1  2]
 [ 0  0  0  0 85  3]
 [ 0  0  0  0  0 88]]
