Step 1: Feature Extraction

Color Histogram – captures color distribution

Haralick Texture Features – captures texture using GLCM

Local Binary Pattern (LBP) – captures fine-grained textures

In [1]:
pip install opencv-python mahotas scikit-image

Collecting mahotas
  Downloading mahotas-1.4.18-cp312-cp312-win_amd64.whl.metadata (14 kB)
Collecting scikit-image
  Downloading scikit_image-0.25.2-cp312-cp312-win_amd64.whl.metadata (14 kB)
Collecting imageio!=2.35.0,>=2.33 (from scikit-image)
  Downloading imageio-2.37.0-py3-none-any.whl.metadata (5.2 kB)
Collecting lazy-loader>=0.4 (from scikit-image)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Downloading mahotas-1.4.18-cp312-cp312-win_amd64.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
    --------------------------------------- 0.0/1.7 MB ? eta -:--:--
    --------------------------------------- 0.0/1.7 MB ? eta -:--:--
    --------------------------------------- 0.0/1.7 MB ? eta -:--:--
    --------------------------------------- 0.0/1.7 MB ? eta -:--:--
    --------------------------------------- 0.0/1.7 MB ? eta -:--:--
    --------------------------------------- 0.0/1.7 MB 131.3 kB/s eta 0:00:13
    -------------------

In [1]:
import cv2
import os
import numpy as np
from skimage.feature import local_binary_pattern
from mahotas import features as mht
from sklearn.preprocessing import LabelEncoder

def extract_color_histogram(image, bins=(8, 8, 8)):
    # Convert image to HSV and compute the histogram
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
                        [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

def extract_haralick(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return mht.haralick(gray).mean(axis=0)

def extract_lbp(image, numPoints=24, radius=8):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, numPoints, radius, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(),
                             bins=np.arange(0, numPoints + 3),
                             range=(0, numPoints + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

def extract_features_from_folder(folder_path):
    features = []
    labels = []
    classes = os.listdir(folder_path)

    for cls in classes:
        class_path = os.path.join(folder_path, cls)
        for file in os.listdir(class_path):
            if file.endswith(('.jpg', '.png', '.jpeg')):
                image_path = os.path.join(class_path, file)
                image = cv2.imread(image_path)
                image = cv2.resize(image, (128, 128))

                color_hist = extract_color_histogram(image)
                haralick = extract_haralick(image)
                lbp = extract_lbp(image)

                combined = np.hstack([color_hist, haralick, lbp])
                features.append(combined)
                labels.append(cls)

    return np.array(features), np.array(labels)

In [2]:
dataset_path = r"D:\Rice-Disease-Simulation\data\train"  # e.g., ./rice_leaf_disease_dataset
X, y = extract_features_from_folder(dataset_path)

# Encode class labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

2. Train-Test Split

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

3. Feature Scaling (if needed)

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

4. Model Training

In [5]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_scaled, y_train)

5. Evaluation

In [6]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = clf.predict(X_val_scaled)
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.83      0.91        12
           1       0.95      0.95      0.95        63
           2       0.99      0.94      0.96        70
           3       0.92      0.96      0.94        70
           4       0.97      1.00      0.99        70
           5       1.00      1.00      1.00        70

    accuracy                           0.97       355
   macro avg       0.97      0.95      0.96       355
weighted avg       0.97      0.97      0.97       355



Evaluate on untouched Val folder

In [7]:
val_dataset_path = r"D:\Rice-Disease-Simulation\data\validation"
X_val_real, y_val_real = extract_features_from_folder(val_dataset_path)

# Encode labels using same LabelEncoder
y_val_real_encoded = le.transform(y_val_real)

# Scale validation features using the same scaler
X_val_real_scaled = scaler.transform(X_val_real)

y_pred = clf.predict(X_val_real_scaled)

print(classification_report(y_val_real_encoded, y_pred, target_names=le.classes_))

                       precision    recall  f1-score   support

bacterial_leaf_blight       1.00      0.89      0.94        19
           brown_spot       0.92      0.88      0.90        76
              healthy       0.94      0.95      0.95        88
           leaf_blast       0.90      0.92      0.91        88
           leaf_scald       0.98      1.00      0.99        88
    narrow_brown_spot       1.00      1.00      1.00        88

             accuracy                           0.95       447
            macro avg       0.96      0.94      0.95       447
         weighted avg       0.95      0.95      0.95       447

