In [None]:
# Import libraries
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from skimage.feature import graycomatrix, graycoprops
from google.colab import drive

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define dataset path
dataset_path = '/content/drive/My Drive/PlantVillage'

In [None]:
# GLCM feature column names
GLCM_COLUMNS = ['contrast', 'correlation', 'energy', 'homogeneity']

In [None]:
# Function to extract GLCM features
def extract_glcm_features(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Compute GLCM matrix
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    # Extract properties
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    return [contrast, correlation, energy, homogeneity]

In [None]:
# Function to extract histogram features
def extract_histogram_features(image):
    # Convert to HSV
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Compute histograms for each channel
    h_hist = cv2.calcHist([hsv], [0], None, [256], [0, 256]).flatten()
    s_hist = cv2.calcHist([hsv], [1], None, [256], [0, 256]).flatten()
    v_hist = cv2.calcHist([hsv], [2], None, [256], [0, 256]).flatten()
    # Normalize and combine histograms
    h_hist = h_hist / h_hist.sum()
    s_hist = s_hist / s_hist.sum()
    v_hist = v_hist / v_hist.sum()
    return np.concatenate([h_hist, s_hist, v_hist])

In [None]:
# Load images and extract features
def load_data_and_extract_features(dataset_path):
    X = []  # Feature matrix
    y = []  # Labels
    classes = os.listdir(dataset_path)

    for label, class_name in enumerate(classes):
        class_path = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_path):
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                image = cv2.imread(img_path)
                if image is not None:
                    # Resize image for uniformity
                    image = cv2.resize(image, (128, 128))
                    # Extract features
                    glcm_features = extract_glcm_features(image)
                    histogram_features = extract_histogram_features(image)
                    # Combine features
                    features = glcm_features + histogram_features.tolist()
                    X.append(features)
                    y.append(label)

    return np.array(X), np.array(y), classes

In [None]:
# Load dataset and extract features
X, y, class_names = load_data_and_extract_features(dataset_path)

In [None]:
# Define column names for printing
histogram_columns = [f'hist_channel_{i+1}_bin_{j+1}' for i in range(3) for j in range(256)]
columns = GLCM_COLUMNS + histogram_columns

In [None]:
# Print the feature matrix with column names
import pandas as pd
feature_df = pd.DataFrame(X, columns=columns)
feature_df['Label'] = y
print("Feature Matrix (with GLCM and histogram features):")
print(feature_df.head())

Feature Matrix (with GLCM and histogram features):
     contrast  correlation    energy  homogeneity  hist_channel_1_bin_1  \
0  718.481422     0.763072  0.013868     0.069130              0.004272   
1  453.110974     0.887714  0.020153     0.107413              0.011292   
2  181.731361     0.791887  0.020238     0.103961              0.002686   
3  741.419906     0.780671  0.013752     0.071793              0.005981   
4  694.242372     0.723660  0.014731     0.071990              0.004333   

   hist_channel_1_bin_2  hist_channel_1_bin_3  hist_channel_1_bin_4  \
0                   0.0              0.000000              0.002502   
1                   0.0              0.000122              0.001770   
2                   0.0              0.000000              0.000000   
3                   0.0              0.000000              0.000061   
4                   0.0              0.000610              0.005432   

   hist_channel_1_bin_5  hist_channel_1_bin_6  ...  hist_channel_3_bin_

In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train SVM classifier
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

In [None]:
# Evaluate SVM
print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=class_names))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))


SVM Classification Report:
                                             precision    recall  f1-score   support

                             Tomato_healthy       0.94      0.94      0.94       307
                         Tomato_Late_blight       0.73      0.71      0.72       378
                      Tomato_Bacterial_spot       0.84      0.87      0.86       419
Tomato_Spider_mites_Two_spotted_spider_mite       0.78      0.76      0.77       343
                        Tomato_Early_blight       0.88      0.49      0.63       233
                  Tomato_Septoria_leaf_spot       0.77      0.81      0.79       359
                Tomato__Tomato_mosaic_virus       0.87      0.60      0.71        78
                           Tomato_Leaf_Mold       0.74      0.83      0.78       207
                        Tomato__Target_Spot       0.74      0.83      0.78       292
      Tomato__Tomato_YellowLeaf__Curl_Virus       0.79      0.90      0.84       613
                     Pepper__bell___

In [None]:
# Train Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [None]:
# Evaluate Random Forest
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, y_pred_rf, target_names=class_names))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))


Random Forest Classification Report:
                                             precision    recall  f1-score   support

                             Tomato_healthy       0.99      1.00      1.00       307
                         Tomato_Late_blight       0.92      0.93      0.92       378
                      Tomato_Bacterial_spot       0.92      0.94      0.93       419
Tomato_Spider_mites_Two_spotted_spider_mite       0.96      0.94      0.95       343
                        Tomato_Early_blight       0.91      0.83      0.87       233
                  Tomato_Septoria_leaf_spot       0.96      0.95      0.95       359
                Tomato__Tomato_mosaic_virus       0.99      0.99      0.99        78
                           Tomato_Leaf_Mold       0.99      0.97      0.98       207
                        Tomato__Target_Spot       0.93      0.93      0.93       292
      Tomato__Tomato_YellowLeaf__Curl_Virus       0.94      0.98      0.96       613
                     Peppe

In [None]:
# Save the feature matrix as an Excel file
feature_df.to_excel("feature_matrix.xlsx", index=False)

# Download the file in Google Colab
from google.colab import files
files.download("feature_matrix.xlsx")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>