In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
import os
import glob as gb
import cv2
import PIL
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
from tensorflow.keras.utils import to_categorical


In [11]:
# Define paths
trainpath = '/kaggle/input/riceleafdiseasedataset/dataset/train'
testpath = '/kaggle/input/riceleafdiseasedataset/dataset/test'

# Image Processing - Training Data
new_size = 224
train_images = []
train_labels = []
class_disease = {'BacterialBlight': 0, 'Blast': 1, 'BrownSpot': 2, 'Tungro': 3}

for i in os.listdir(trainpath):
    if i in class_disease:
        print("Entering the folder:", i)
        files = gb.glob(pathname=str(trainpath + '/' + i + '/*.jpg')) + gb.glob(pathname=str(trainpath + '/' + i + '/*.JPG'))
        print("Number of images in the folder:", len(files))
        for j in files:
            image_raw = cv2.imread(j)
            image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
            resize_image = cv2.resize(image, (new_size, new_size))
            train_images.append(list(resize_image))
            train_labels.append(class_disease[i])

# Image Processing - Testing Data
new_size = 224
test_images = []
test_labels = []

for i in os.listdir(testpath):
    if i in class_disease:
        print("Entering to the folder name:", i)
        files = gb.glob(pathname=str(testpath + '/' + i + '/*.jpg')) + gb.glob(pathname=str(testpath + '/' + i + '/*.JPG'))
        print("Number of images in the folder is", len(files))
        for j in files:
            image_raw = cv2.imread(j)
            image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
            resize_image = cv2.resize(image, (new_size, new_size))
            test_images.append(list(resize_image))
            test_labels.append(class_disease[i])

def list_to_array_train(train_images, train_labels):
    return np.array(train_images), np.array(train_labels)

X_train, y_train = list_to_array_train(train_images, train_labels)

def list_to_array_test(test_images, test_labels):
    return np.array(test_images), np.array(test_labels)

X_test, y_test = list_to_array_test(test_images, test_labels)

print(X_train.shape)
print("*" * 20)
print(y_train.shape)
print("*" * 20)
print(X_test.shape)
print(y_test.shape)

def keras_to_categorical(y_train, y_test):
    return to_categorical(y_train), to_categorical(y_test)

y_train1 = y_train
y_test1 = y_test
y_train, y_test = keras_to_categorical(y_train, y_test)

y_train1.shape, y_test1.shape

def convert_one_hot_to_categorical(one_hot_labels):
    return np.argmax(one_hot_labels, axis=1)


Entering the folder: Tungro
Number of images in the folder: 1108
Entering the folder: BacterialBlight
Number of images in the folder: 1384
Entering the folder: Blast
Number of images in the folder: 1240
Entering the folder: BrownSpot
Number of images in the folder: 1400
Entering to the folder name: Tungro
Number of images in the folder is 200
Entering to the folder name: BacterialBlight
Number of images in the folder is 200
Entering to the folder name: Blast
Number of images in the folder is 200
Entering to the folder name: BrownSpot
Number of images in the folder is 200
(5132, 224, 224, 3)
********************
(5132,)
********************
(800, 224, 224, 3)
(800,)


In [18]:
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray
from tqdm import tqdm  # for progress bar if needed

def compute_glcm_features(image, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256):
    if image.ndim == 3:  # Convert RGB to grayscale if necessary
        gray_image = rgb2gray(image)
    else:
        gray_image = image.astype(np.uint8)  # Convert to uint8 if not already
    
    # Ensure image is in uint8 format
    if gray_image.dtype != np.uint8:
        gray_image = (gray_image * 255).astype(np.uint8)
    
    glcm = graycomatrix(gray_image, distances=distances, angles=angles, levels=levels,
                        symmetric=True, normed=True)
    
    # Calculate GLCM properties
    contrast = graycoprops(glcm, 'contrast').flatten()
    dissimilarity = graycoprops(glcm, 'dissimilarity').flatten()
    homogeneity = graycoprops(glcm, 'homogeneity').flatten()
    energy = graycoprops(glcm, 'energy').flatten()
    correlation = graycoprops(glcm, 'correlation').flatten()
    
    # Concatenate all GLCM features into one array
    glcm_features = np.concatenate((contrast, dissimilarity, homogeneity, energy, correlation))
    
    return glcm_features

def extract_glcm_features(images):
    glcm_features = []
    # Initialize tqdm for progress bar if needed
    for img in tqdm(images, desc="Extracting GLCM features"):
        glcm_feature = compute_glcm_features(img)
        glcm_features.append(glcm_feature)
    
    return np.array(glcm_features)


In [19]:
train_feature_glcm = extract_glcm_features(X_train)
test_feature_glcm = extract_glcm_features(X_test)

Extracting GLCM features: 100%|██████████| 5132/5132 [03:37<00:00, 23.65it/s]
Extracting GLCM features: 100%|██████████| 800/800 [00:33<00:00, 23.55it/s]


# glcm + Random Forest Classifier

In [20]:
from sklearn.ensemble import RandomForestClassifier

def train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test):

        rf = RandomForestClassifier()
        rf = rf.fit(train_feature_glcm, y_train)
        test_pred = rf.predict(test_feature_glcm)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'glcm+RF.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.99250000, Recall: 0.99250000, Precision: 1.00000000, F1 Score: 0.99619289, AUC: 0.99625000
Run 2 - Accuracy: 0.99125000, Recall: 0.99125000, Precision: 1.00000000, F1 Score: 0.99556633, AUC: 0.99562500
Run 3 - Accuracy: 0.98750000, Recall: 0.98750000, Precision: 1.00000000, F1 Score: 0.99366764, AUC: 0.99375000
Run 4 - Accuracy: 0.99250000, Recall: 0.99250000, Precision: 1.00000000, F1 Score: 0.99619289, AUC: 0.99625000
Run 5 - Accuracy: 0.98750000, Recall: 0.98750000, Precision: 1.00000000, F1 Score: 0.99366764, AUC: 0.99375000
Run 6 - Accuracy: 0.99125000, Recall: 0.99125000, Precision: 1.00000000, F1 Score: 0.99556633, AUC: 0.99562500
Run 7 - Accuracy: 0.99000000, Recall: 0.99000000, Precision: 1.00000000, F1 Score: 0.99493661, AUC: 0.99500000
Run 8 - Accuracy: 0.98750000, Recall: 0.98750000, Precision: 1.00000000, F1 Score: 0.99366764, AUC: 0.99375000
Run 9 - Accuracy: 0.99000000, Recall: 0.99000000, Precision: 1.00000000, F1 Score: 0.99493661, AUC: 0.99500000
R

# glcm + Decision Tree Classifier

In [21]:
from sklearn.tree import DecisionTreeClassifier

def train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test):

        dt = DecisionTreeClassifier()
        dt.fit(train_feature_glcm, y_train)
        test_pred = dt.predict(test_feature_glcm)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'glcm+DTC.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.98250000, Recall: 0.98250000, Precision: 0.98260989, F1 Score: 0.98250860, AUC: 0.98833333
Run 2 - Accuracy: 0.98250000, Recall: 0.98250000, Precision: 0.98271899, F1 Score: 0.98251361, AUC: 0.98833333
Run 3 - Accuracy: 0.97250000, Recall: 0.97250000, Precision: 0.97330716, F1 Score: 0.97248008, AUC: 0.98166667
Run 4 - Accuracy: 0.97625000, Recall: 0.97625000, Precision: 0.97668526, F1 Score: 0.97623409, AUC: 0.98416667
Run 5 - Accuracy: 0.97750000, Recall: 0.97750000, Precision: 0.97789024, F1 Score: 0.97749195, AUC: 0.98500000
Run 6 - Accuracy: 0.98125000, Recall: 0.98125000, Precision: 0.98148666, F1 Score: 0.98126382, AUC: 0.98750000
Run 7 - Accuracy: 0.97500000, Recall: 0.97500000, Precision: 0.97539237, F1 Score: 0.97501052, AUC: 0.98333333
Run 8 - Accuracy: 0.96625000, Recall: 0.96625000, Precision: 0.96788891, F1 Score: 0.96625726, AUC: 0.97750000
Run 9 - Accuracy: 0.98500000, Recall: 0.98500000, Precision: 0.98528046, F1 Score: 0.98500736, AUC: 0.99000000
R

# glcm + KNN Classifier

In [22]:
from sklearn.neighbors import KNeighborsClassifier

def train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test):

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(train_feature_glcm, y_train)
        test_pred = knn.predict(test_feature_glcm)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'glcm+KNN.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 2 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 3 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 4 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 5 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 6 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 7 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 8 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
Run 9 - Accuracy: 0.86875000, Recall: 0.86875000, Precision: 0.87781146, F1 Score: 0.86843222, AUC: 0.91312500
R

# glcm + SVM Classifier

In [23]:
from sklearn.svm import SVC

def train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test):

        y_train_cat = convert_one_hot_to_categorical(y_train)
        y_test_cat = convert_one_hot_to_categorical(y_test)

        # Define and train SVM Classifier
        svm = SVC(probability=True)
        svm.fit(train_feature_glcm, y_train_cat)
        test_pred = svm.predict(test_feature_glcm)
        test_pred_proba = svm.predict_proba(test_feature_glcm)

        # Calculate evaluation metrics
        accuracy = accuracy_score(y_test_cat, test_pred)
        recall = recall_score(y_test_cat, test_pred, average='weighted')
        precision = precision_score(y_test_cat, test_pred, average='weighted')
        f1 = f1_score(y_test_cat, test_pred, average='weighted')
        auc = roc_auc_score(y_test_cat, test_pred_proba, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'glcm+SVM.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79858125
Run 2 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79900000
Run 3 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79870208
Run 4 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79838958
Run 5 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79857500
Run 6 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79864375
Run 7 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79871458
Run 8 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79891875
Run 9 - Accuracy: 0.52625000, Recall: 0.52625000, Precision: 0.56299196, F1 Score: 0.51652173, AUC: 0.79845417
R

In [24]:
from xgboost import XGBClassifier

def train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test):

        y_train_cat = convert_one_hot_to_categorical(y_train)
        y_test_cat = convert_one_hot_to_categorical(y_test)

        xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
        xgb.fit(train_feature_glcm, y_train_cat)
        test_pred = xgb.predict(test_feature_glcm)
        test_pred_proba = xgb.predict_proba(test_feature_glcm)

        # Calculate evaluation metrics
        accuracy = accuracy_score(y_test_cat, test_pred)
        recall = recall_score(y_test_cat, test_pred, average='weighted')
        precision = precision_score(y_test_cat, test_pred, average='weighted')
        f1 = f1_score(y_test_cat, test_pred, average='weighted')
        auc = roc_auc_score(y_test_cat, test_pred_proba, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_glcm, y_train, test_feature_glcm, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'glcm+XGB.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 2 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 3 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 4 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 5 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 6 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 7 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 8 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
Run 9 - Accuracy: 0.99875000, Recall: 0.99875000, Precision: 0.99875622, F1 Score: 0.99874999, AUC: 1.00000000
R