In [7]:
import numpy as np
import pandas as pd
import os
import cv2 as cv

## Loading the dataset

In [3]:
#loading the dataset
img = cv.imread(r'C:\Users\Siddu\Downloads\CV project\archive\PlantVillage\tomato\Tomato_healthy\e8ccc10e-1991-4cd9-8419-fc3857f96867___RS_HL 0550.JPG')
img = cv.resize(img,(256,256))
if img is None:
    print("Error: Image not found or unable to load.")
else:
    
    cv.imshow('Image', img)

   
    while True:
        if cv.waitKey(1) & 0xFF == ord('q'):
            break

    cv.destroyAllWindows()

In [2]:
DATASET_PATH = r"C:\Users\Siddu\Downloads\CV project\archive\PlantVillage"

images = []
labels = []

image_extensions = ['.jpg', '.jpeg', '.png']

# Traverse each top-level class folder (e.g., Pepper, Potato)
for class_name in os.listdir(DATASET_PATH):
    class_folder = os.path.join(DATASET_PATH, class_name)

    if not os.path.isdir(class_folder):
        continue

    # Traverse each subclass folder (e.g., PepperBacteria)
    for subclass_name in os.listdir(class_folder):
        subclass_folder = os.path.join(class_folder, subclass_name)

        if not os.path.isdir(subclass_folder):
            continue

        for file_name in os.listdir(subclass_folder):
            if not any(file_name.lower().endswith(ext) for ext in image_extensions):
                continue

            file_path = os.path.join(subclass_folder, file_name)

            img = cv.imread(file_path)
            if img is None:
                print(f"Could not read {file_path}")
                continue

            img_resized = cv.resize(img, (256,256))
            images.append(img_resized)
            labels.append(subclass_name)

print(f"Loaded {len(images)} images from {len(set(labels))} classes.")

Loaded 20638 images from 15 classes.


### Using mean color

In [15]:
def extract_mean_color(image):
    hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)

    mean_rgb = np.mean(image, axis=(0, 1))
    mean_hsv = np.mean(hsv, axis=(0, 1))

    return np.concatenate([mean_rgb, mean_hsv])


In [15]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

features = [extract_mean_color(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))


Accuracy: 0.609108527131783


### Using color histogram

In [13]:
def extract_color_histogram(image):
    hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)

    hist_hue = cv.calcHist([hsv], [0], None, [32], [0, 180])
    hist_hue = cv.normalize(hist_hue, hist_hue).flatten()

    return hist_hue

In [22]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

features = [extract_color_histogram(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))


Accuracy: 0.7844961240310078


### Using  Color Moments (Mean, Std, Skew for HSV)

In [30]:
from scipy.stats import skew

def extract_color_moments(image):
    image = cv.resize(image, (128, 128))
    hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)

    color_moments = []
    for i in range(3):  # H, S, V
        channel = hsv[:, :, i].flatten()
        mean = np.mean(channel)
        std = np.std(channel)
        sk = skew(channel)
        color_moments.extend([mean, std, sk])

    return np.array(color_moments)  # 9 features


In [27]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

features = [extract_color_moments(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))


Accuracy: 0.6331395348837209


In [31]:
y_pred = model.predict(X_test)
y_pred

array(['Tomato__Tomato_YellowLeaf__Curl_Virus',
       'Pepper__bell___Bacterial_spot', 'Tomato_Bacterial_spot', ...,
       'Pepper__bell___Bacterial_spot', 'Tomato_Early_blight',
       'Pepper__bell___healthy'], dtype='<U43')

### Using HSV

In [29]:
def extract_hsv_mean_only(image):
    hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)
    return np.mean(hsv, axis=(0, 1))


In [37]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

features = [extract_hsv_mean_only(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))

Accuracy: 0.488953488372093


## Combining color and histogram

In [11]:
def extract_combined_color_histogram(image):
    mean_color = extract_mean_color(image)
    hist = extract_color_histogram(image)
    combined = np.concatenate([mean_color, hist])
    return combined


In [44]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_hsv_mean_only(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))

Accuracy: 0.49534883720930234



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.40      0.22      0.28       249
                     Pepper__bell___healthy       0.33      0.40      0.36       370
                      Potato___Early_blight       0.70      0.88      0.78       250
                       Potato___Late_blight       0.23      0.15      0.18       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.57      0.68      0.62       532
                        Tomato_Early_blight       1.00      0.00      0.01       250
                         Tomato_Late_blight       0.73      0.31      0.44       477
                           Tomato_Leaf_Mold       0.62      0.02      0.04       238
                  Tomato_Septoria_leaf_spot       0.50      0.67      0.57       443
Tomato_Spider_mites_Two_spotted_spider_mite       0.44      0.34

  _warn_prf(average, modifier, msg_start, len(result))


## Combined Histogram and Moments

In [47]:
def extract_histogram_moments(image):
    hist = extract_color_histogram(image)
    moments = extract_color_moments(image)
    return np.concatenate([hist, moments])


In [48]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_histogram_moments(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))

Accuracy: 0.623062015503876



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.49      0.53      0.51       249
                     Pepper__bell___healthy       0.42      0.46      0.44       370
                      Potato___Early_blight       0.76      0.81      0.78       250
                       Potato___Late_blight       0.42      0.44      0.43       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.74      0.80      0.77       532
                        Tomato_Early_blight       0.43      0.18      0.25       250
                         Tomato_Late_blight       0.63      0.42      0.50       477
                           Tomato_Leaf_Mold       0.84      0.21      0.33       238
                  Tomato_Septoria_leaf_spot       0.58      0.66      0.62       443
Tomato_Spider_mites_Two_spotted_spider_mite       0.60      0.66

## Combining 3 features extractions

In [54]:
def extract_all_color_features(image):
    return np.concatenate([
        extract_mean_color(image),
        extract_color_histogram(image),
        extract_color_moments(image),
        extract_hsv_mean_only(image)
    ])


In [55]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_histogram_moments(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))

Accuracy: 0.6180232558139535



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.49      0.50      0.49       249
                     Pepper__bell___healthy       0.41      0.45      0.43       370
                      Potato___Early_blight       0.72      0.84      0.77       250
                       Potato___Late_blight       0.42      0.47      0.44       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.76      0.80      0.78       532
                        Tomato_Early_blight       0.40      0.19      0.26       250
                         Tomato_Late_blight       0.62      0.42      0.50       477
                           Tomato_Leaf_Mold       0.90      0.23      0.37       238
                  Tomato_Septoria_leaf_spot       0.60      0.62      0.61       443
Tomato_Spider_mites_Two_spotted_spider_mite       0.58      0.66

  _warn_prf(average, modifier, msg_start, len(result))


## Texture Features

### Using GLCM

In [6]:
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray
import cv2 as cv
import numpy as np

def extract_glcm_features(image):
    gray_image = cv.resize(image, (128, 128))
    gray_image = rgb2gray(gray_image) 
    gray_image = (gray_image * 255).astype(np.uint8) 

    glcm = graycomatrix(gray_image, [5], [0], 256, symmetric=True, normed=True)
    glcm_props = np.hstack([
        graycoprops(glcm, prop).flatten()
        for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
    ])

    return glcm_props


In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_glcm_features(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))

Accuracy: 0.22093023255813954



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.00      0.00      0.00       249
                     Pepper__bell___healthy       0.00      0.00      0.00       370
                      Potato___Early_blight       0.14      0.05      0.08       250
                       Potato___Late_blight       0.00      0.00      0.00       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.29      0.55      0.38       532
                        Tomato_Early_blight       0.00      0.00      0.00       250
                         Tomato_Late_blight       0.00      0.00      0.00       477
                           Tomato_Leaf_Mold       0.00      0.00      0.00       238
                  Tomato_Septoria_leaf_spot       0.00      0.00      0.00       443
Tomato_Spider_mites_Two_spotted_spider_mite       0.15      0.51

  _warn_prf(average, modifier, msg_start, len(result))


## Color histogram + GLCM

In [9]:
def extract_features(image):
    return np.concatenate([
        extract_color_histogram(image),
        extract_glcm_features(image)
    ])

In [14]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_features(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))

Accuracy: 0.22674418604651161



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.00      0.00      0.00       249
                     Pepper__bell___healthy       0.00      0.00      0.00       370
                      Potato___Early_blight       0.17      0.04      0.06       250
                       Potato___Late_blight       0.00      0.00      0.00       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.29      0.54      0.38       532
                        Tomato_Early_blight       0.00      0.00      0.00       250
                         Tomato_Late_blight       1.00      0.01      0.02       477
                           Tomato_Leaf_Mold       0.00      0.00      0.00       238
                  Tomato_Septoria_leaf_spot       0.00      0.00      0.00       443
Tomato_Spider_mites_Two_spotted_spider_mite       0.15      0.53

### Mean color + GLCM 

In [16]:
def extract_features2(image):
    return np.concatenate([
        extract_mean_color(image),
        extract_glcm_features(image)
    ])

In [17]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_features2(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))

Accuracy: 0.3763565891472868

                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.55      0.16      0.25       249
                     Pepper__bell___healthy       0.29      0.10      0.15       370
                      Potato___Early_blight       0.45      0.62      0.52       250
                       Potato___Late_blight       0.24      0.07      0.11       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.38      0.78      0.51       532
                        Tomato_Early_blight       0.00      0.00      0.00       250
                         Tomato_Late_blight       0.57      0.21      0.30       477
                           Tomato_Leaf_Mold       0.00      0.00      0.00       238
                  Tomato_Septoria_leaf_spot       0.53      0.33      0.41       443
Tomato_Spider_mites_Two_spotted_sp

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Local Binary Pattern (LBP) Histogram

In [19]:
from skimage.feature import local_binary_pattern
from skimage.color import rgb2gray
import cv2
import numpy as np

def extract_lbp_features(image):
    gray = cv2.resize(image, (128, 128))
    gray = rgb2gray(gray)

    radius = 1
    n_points = 8 * radius

    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')

    (hist, _) = np.histogram(lbp.ravel(),
                             bins=np.arange(0, n_points + 3),
                             range=(0, n_points + 2))

    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)

    return hist


In [20]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_lbp_features(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))



Accuracy: 0.3815891472868217



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.37      0.20      0.26       249
                     Pepper__bell___healthy       0.26      0.14      0.18       370
                      Potato___Early_blight       0.52      0.36      0.42       250
                       Potato___Late_blight       0.55      0.16      0.25       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.38      0.76      0.50       532
                        Tomato_Early_blight       0.14      0.00      0.01       250
                         Tomato_Late_blight       0.50      0.34      0.40       477
                           Tomato_Leaf_Mold       0.00      0.00      0.00       238
                  Tomato_Septoria_leaf_spot       0.23      0.16      0.19       443
Tomato_Spider_mites_Two_spotted_spider_mite       0.26      0.42

  _warn_prf(average, modifier, msg_start, len(result))


## LBP + mean color

In [22]:
def extract_features3(image):
    return np.concatenate([
        extract_mean_color(image),
        extract_lbp_features(image)
    ])

In [23]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_features3(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))



Accuracy: 0.5184108527131783

                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.42      0.29      0.35       249
                     Pepper__bell___healthy       0.65      0.64      0.65       370
                      Potato___Early_blight       0.77      0.81      0.79       250
                       Potato___Late_blight       0.33      0.23      0.27       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.56      0.72      0.63       532
                        Tomato_Early_blight       0.00      0.00      0.00       250
                         Tomato_Late_blight       0.53      0.24      0.33       477
                           Tomato_Leaf_Mold       0.75      0.21      0.33       238
                  Tomato_Septoria_leaf_spot       0.50      0.66      0.57       443
Tomato_Spider_mites_Two_spotted_sp

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Meancolor + LBP + color histogram + hsv + color moments

In [31]:
def extract_features4(image):
    return np.concatenate([
        extract_mean_color(image),
        extract_color_histogram(image),
        extract_color_moments(image),
        extract_hsv_mean_only(image),
        extract_lbp_features(image),
    ])

In [32]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [extract_features4(img) for img in images]
X = np.array(features)
y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", model.score(X_test, y_test))
print()
print(classification_report(y_test,y_pred))



Accuracy: 0.612015503875969

                                             precision    recall  f1-score   support

              Pepper__bell___Bacterial_spot       0.50      0.52      0.51       249
                     Pepper__bell___healthy       0.60      0.61      0.60       370
                      Potato___Early_blight       0.76      0.86      0.81       250
                       Potato___Late_blight       0.39      0.36      0.37       250
                           Potato___healthy       0.00      0.00      0.00        38
                      Tomato_Bacterial_spot       0.70      0.78      0.74       532
                        Tomato_Early_blight       0.39      0.14      0.20       250
                         Tomato_Late_blight       0.70      0.31      0.43       477
                           Tomato_Leaf_Mold       0.84      0.24      0.37       238
                  Tomato_Septoria_leaf_spot       0.56      0.68      0.61       443
Tomato_Spider_mites_Two_spotted_spi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
