In [1]:
!git clone https://github.com/spMohanty/PlantVillage-Dataset

Cloning into 'PlantVillage-Dataset'...
remote: Enumerating objects: 163229, done.[K
remote: Total 163229 (delta 0), reused 0 (delta 0), pack-reused 163229[K
Receiving objects: 100% (163229/163229), 2.00 GiB | 24.60 MiB/s, done.
Resolving deltas: 100% (99/99), done.
Updating files: 100% (182401/182401), done.


In [2]:
%cd PlantVillage-Dataset/raw/color

/content/PlantVillage-Dataset/raw/color


In [3]:
!pip install tensorflow numpy pandas opencv-python scikit-learn



In [4]:
pip install numpy opencv-python scikit-learn pywavelets scikit-image




In [5]:
import cv2
import numpy as np
import pywt
from sklearn.decomposition import PCA
from skimage.feature import greycomatrix, greycoprops

# Step 1: Histogram Equalization
def histogram_equalization(image):
    if len(image.shape) == 3:
        img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
        img_yuv[:, :, 0] = cv2.equalizeHist(img_yuv[:, :, 0])
        image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
    else:
        image = cv2.equalizeHist(image)
    return image

# Step 2: K-means Clustering
def kmeans_clustering(image, k=3):
    pixel_values = image.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)
    _, labels, centers = cv2.kmeans(pixel_values, k, None,
                                    (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2),
                                    10, cv2.KMEANS_RANDOM_CENTERS)
    centers = np.uint8(centers)
    segmented_image = centers[labels.flatten()]
    segmented_image = segmented_image.reshape(image.shape)
    return segmented_image

# Step 3: Contour Tracing
def contour_tracing(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 150)
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

# Step 4: Discrete Wavelet Transform (DWT)
def dwt(image):
    coeffs2 = pywt.dwt2(image, 'bior1.3')
    LL, (LH, HL, HH) = coeffs2
    return LL, LH, HL, HH

# Step 5: Principal Component Analysis (PCA)
def apply_pca(data, n_components=50):
    pca = PCA(n_components=n_components)
    principal_components = pca.fit_transform(data)
    return principal_components

# Step 6: Gray-Level Co-occurrence Matrix (GLCM)
def glcm_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    glcm = greycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], symmetric=True, normed=True)
    contrast = greycoprops(glcm, 'contrast')
    dissimilarity = greycoprops(glcm, 'dissimilarity')
    homogeneity = greycoprops(glcm, 'homogeneity')
    energy = greycoprops(glcm, 'energy')
    correlation = greycoprops(glcm, 'correlation')
    return contrast, dissimilarity, homogeneity, energy, correlation

# Apply preprocessing to dataset
import os
from glob import glob

dataset_path = 'PlantVillage-Dataset/raw/color'
image_files = glob(os.path.join(dataset_path, '*.jpg'))

preprocessed_data = []

for file in image_files:
    image = cv2.imread(file)
    # Step 1: Histogram Equalization
    image = histogram_equalization(image)
    # Step 2: K-means Clustering
    image = kmeans_clustering(image)
    # Step 3: Contour Tracing
    contours = contour_tracing(image)
    # Step 4: Discrete Wavelet Transform (DWT)
    LL, LH, HL, HH = dwt(image)
    # Step 5: Principal Component Analysis (PCA)
    image_reshaped = LL.reshape(-1, LL.shape[2])
    pca_features = apply_pca(image_reshaped)
    # Step 6: Gray-Level Co-occurrence Matrix (GLCM)
    contrast, dissimilarity, homogeneity, energy, correlation = glcm_features(image)

    # Collecting all features
    features = np.hstack((pca_features.flatten(), contrast.flatten(), dissimilarity.flatten(),
                          homogeneity.flatten(), energy.flatten(), correlation.flatten()))

    preprocessed_data.append(features)

# Convert to numpy array
preprocessed_data = np.array(preprocessed_data)


In [None]:
''''import os

# Directory to save the processed images
processed_dir = 'processed_images'
os.makedirs(processed_dir, exist_ok=True)

for i, file in enumerate(image_files):
    image = cv2.imread(file)
    # Step 1: Histogram Equalization
    image = histogram_equalization(image)
    # Step 2: K-means Clustering
    image = kmeans_clustering(image)
    # You can save the processed images after K-means Clustering
    processed_image_path = os.path.join(processed_dir, os.path.basename(file))
    cv2.imwrite(processed_image_path, image)

print("Processed images saved to", processed_dir)
'''


"""In this example, the preprocessed images are saved to a new directory called
 processed_images. Each preprocessed image is saved with the same filename as the
  original image. This way, the original dataset remains unchanged, and you have
  a separate directory containing the preprocessed images."""