In [1]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from skimage.feature import greycomatrix, greycoprops
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.optimizers import Adam
from lightgbm import LGBMClassifier

In [2]:
image_path = r"C:\Users\Dileep chandu\Downloads\archive (3)\Segmented Medicinal Leaf Images"

In [3]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical

# Path to the folder containing images
image_dir = r"C:\Users\Dileep chandu\Downloads\archive (3)\Segmented Medicinal Leaf Images"

# Placeholder for data
X, y = [], []
class_labels = {}  # Dictionary to store folder-to-label mapping

# Loop through subdirectories
for class_index, class_name in enumerate(os.listdir(image_dir)):
    class_path = os.path.join(image_dir, class_name)
    
    if os.path.isdir(class_path):  # Ensure it's a folder
        class_labels[class_index] = class_name  # Store label mapping
        
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            
            if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                # Read and process image
                image = cv2.imread(img_path)
                if image is None:
                    print(f"⚠️ Skipping: {img_name} (failed to load)")
                    continue

                image = cv2.resize(image, (128, 128))  # Resize
                image = image / 255.0  # Normalize

                X.append(image)
                y.append(class_index)  # Assign class index

# Convert lists to NumPy arrays
X = np.array(X)
y = to_categorical(y, num_classes=len(class_labels))  # One-hot encoding

print(f"✅ Loaded {len(X)} images from {len(class_labels)} classes successfully.")


✅ Loaded 1835 images from 30 classes successfully.


In [4]:
# Function to apply Gabor Filter
def gabor_filter(img):
    gabor_kernels = []
    filtered_images = []
    for theta in np.arange(0, np.pi, np.pi / 4):
        kernel = cv2.getGaborKernel((21, 21), 8.0, theta, 10.0, 0.5, 0, ktype=cv2.CV_32F)
        gabor_kernels.append(kernel)
        filtered = cv2.filter2D(img, cv2.CV_8UC3, kernel)
        filtered_images.append(filtered)
    return filtered_images

In [5]:
# Function for GLCM Feature Extraction
def glcm_features(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    glcm = greycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = greycoprops(glcm, 'contrast')[0, 0]
    homogeneity = greycoprops(glcm, 'homogeneity')[0, 0]
    return [contrast, homogeneity]


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Ensure num_classes is defined
num_classes = len(class_labels)  

# CNN Model
def build_cnn(num_classes):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Conv2D(128, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')  # Use the dynamic number of classes
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Build CNN Model
cnn_model = build_cnn(num_classes)
cnn_model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
from sklearn.decomposition import PCA

# Reduce image size before flattening (Optional)
X_small = np.array([cv2.resize(img, (32, 32)) for img in X])  # Resize to 32x32
X_flat = X_small.reshape(X_small.shape[0], -1)  # Flatten

# Apply PCA to reduce dimensions
pca = PCA(n_components=100)  # Reduce to 100 features
X_pca = pca.fit_transform(X_flat)


In [8]:
rfc = RandomForestClassifier(n_estimators=100, max_features="sqrt", random_state=42)


In [9]:
print(f"📊 X shape: {X.shape if 'X' in locals() else '❌ Not defined'}")
print(f"📊 y shape: {y.shape if 'y' in locals() else '❌ Not defined'}")


📊 X shape: (1835, 128, 128, 3)
📊 y shape: (1835, 30)


In [10]:
print(f"X shape: {X.shape if 'X' in locals() else 'Not defined'}")
print(f"y shape: {y.shape if 'y' in locals() else 'Not defined'}")


X shape: (1835, 128, 128, 3)
y shape: (1835, 30)


In [11]:
import numpy as np

# Convert one-hot encoded labels back to categorical values
y_categorical = np.argmax(y, axis=1)  # Convert to shape (1835,)
print(f"✅ Converted y shape: {y_categorical.shape}")  # Should be (1835,)


✅ Converted y shape: (1835,)


In [12]:
X_flat = X.reshape(X.shape[0], -1)  # Shape: (1835, 128*128*3)
print(f"✅ X_flat shape: {X_flat.shape}")  # Should be (1835, 49152)


✅ X_flat shape: (1835, 49152)


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_flat, y_categorical, test_size=0.2, random_state=42)

print(f"📊 Train shape: {X_train.shape}, {y_train.shape}")
print(f"📊 Test shape: {X_test.shape}, {y_test.shape}")


📊 Train shape: (1468, 49152), (1468,)
📊 Test shape: (367, 49152), (367,)


In [14]:
# from xgboost import XGBClassifier

# rfc = RandomForestClassifier(n_estimators=100)  # Reduce trees from 300 to 100
# xgb = XGBClassifier(n_estimators=50)  # Reduce estimators from 100 to 50


In [17]:
from sklearn.decomposition import PCA

# Reduce image dimensions
pca = PCA(n_components=300)  # Reduce features from 49,152 to 300
X_pca = pca.fit_transform(X_flat)

# Now use X_pca instead of X_flat in RandomForestClassifier
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

rfc = RandomForestClassifier(n_estimators=100, max_features=0.5, random_state=42)
rfc.fit(X_train, y_train)


RandomForestClassifier(max_features=0.5, random_state=42)

In [19]:
 import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Convert One-Hot Encoding to Integer Labels
y_labels = np.argmax(y, axis=1)  # Converts (1835, 30) to (1835,)

# Flatten the image dataset for XGBoost
X_flat = X.reshape(X.shape[0], -1)  # (1835, 128, 128, 3) → (1835, 49152)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_labels, test_size=0.2, random_state=42)

# Train XGBoost
xgb = XGBClassifier(n_estimators=50, eval_metric='mlogloss', use_label_encoder=False)
xgb.fit(X_train, y_train)

# Predict
preds_xgb = xgb.predict(X_test)
print(f"✅ XGBoost Accuracy: {accuracy_score(y_test, preds_xgb)}")


✅ XGBoost Accuracy: 0.7002724795640327


In [None]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1]
}

# Initialize classifier
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

# Perform Grid Search
grid_search = GridSearchCV(xgb, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters & best score
print("✅ Best Parameters:", grid_search.best_params_)
print("✅ Best Accuracy:", grid_search.best_score_)

# Train the best model
best_xgb = grid_search.best_estimator_
best_xgb.fit(X_train, y_train)

# Evaluate on test set
preds_best_xgb = best_xgb.predict(X_test)
print("✅ Optimized XGBoost Accuracy:", accuracy_score(y_test, preds_best_xgb))




Fitting 3 folds for each of 27 candidates, totalling 81 fits


In [2]:
pip install git init


Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement git (from versions: none)
ERROR: No matching distribution found for git
