In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
import os
import glob as gb
import cv2
import PIL
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
from tensorflow.keras.utils import to_categorical


2024-07-10 08:26:27.476769: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-10 08:26:27.476933: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-10 08:26:27.708777: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Define paths
trainpath = '/kaggle/input/riceleafdiseasedataset/dataset/train'
testpath = '/kaggle/input/riceleafdiseasedataset/dataset/test'

# Image Processing - Training Data
new_size = 224
train_images = []
train_labels = []
class_disease = {'BacterialBlight': 0, 'Blast': 1, 'BrownSpot': 2, 'Tungro': 3}

for i in os.listdir(trainpath):
    if i in class_disease:
        print("Entering the folder:", i)
        files = gb.glob(pathname=str(trainpath + '/' + i + '/*.jpg')) + gb.glob(pathname=str(trainpath + '/' + i + '/*.JPG'))
        print("Number of images in the folder:", len(files))
        for j in files:
            image_raw = cv2.imread(j)
            image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
            resize_image = cv2.resize(image, (new_size, new_size))
            train_images.append(list(resize_image))
            train_labels.append(class_disease[i])

# Image Processing - Testing Data
new_size = 224
test_images = []
test_labels = []

for i in os.listdir(testpath):
    if i in class_disease:
        print("Entering to the folder name:", i)
        files = gb.glob(pathname=str(testpath + '/' + i + '/*.jpg')) + gb.glob(pathname=str(testpath + '/' + i + '/*.JPG'))
        print("Number of images in the folder is", len(files))
        for j in files:
            image_raw = cv2.imread(j)
            image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
            resize_image = cv2.resize(image, (new_size, new_size))
            test_images.append(list(resize_image))
            test_labels.append(class_disease[i])

def list_to_array_train(train_images, train_labels):
    return np.array(train_images), np.array(train_labels)

X_train, y_train = list_to_array_train(train_images, train_labels)

def list_to_array_test(test_images, test_labels):
    return np.array(test_images), np.array(test_labels)

X_test, y_test = list_to_array_test(test_images, test_labels)

print(X_train.shape)
print("*" * 20)
print(y_train.shape)
print("*" * 20)
print(X_test.shape)
print(y_test.shape)

def keras_to_categorical(y_train, y_test):
    return to_categorical(y_train), to_categorical(y_test)

y_train1 = y_train
y_test1 = y_test
y_train, y_test = keras_to_categorical(y_train, y_test)

y_train1.shape, y_test1.shape

def convert_one_hot_to_categorical(one_hot_labels):
    return np.argmax(one_hot_labels, axis=1)


Entering the folder: Tungro
Number of images in the folder: 1108
Entering the folder: BacterialBlight
Number of images in the folder: 1384
Entering the folder: Blast
Number of images in the folder: 1240
Entering the folder: BrownSpot
Number of images in the folder: 1400
Entering to the folder name: Tungro
Number of images in the folder is 200
Entering to the folder name: BacterialBlight
Number of images in the folder is 200
Entering to the folder name: Blast
Number of images in the folder is 200
Entering to the folder name: BrownSpot
Number of images in the folder is 200
(5132, 224, 224, 3)
********************
(5132,)
********************
(800, 224, 224, 3)
(800,)


In [3]:
def extract_sift_features(images, max_features=128):
    sift = cv2.SIFT_create()
    sift_features = []
    for img in images:
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is not None:
            descriptors = descriptors.flatten()
            if len(descriptors) < max_features:
                descriptors = np.pad(descriptors, (0, max_features - len(descriptors)), 'constant')
            else:
                descriptors = descriptors[:max_features]
        else:
            descriptors = np.zeros(max_features)
        sift_features.append(descriptors)
    return np.array(sift_features)

In [4]:
train_feature_sift = extract_sift_features(X_train)
test_feature_sift = extract_sift_features(X_test)

# sift + Random Forest Classifier

In [5]:
from sklearn.ensemble import RandomForestClassifier

def train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test):

        rf = RandomForestClassifier()
        rf = rf.fit(train_feature_sift, y_train)
        test_pred = rf.predict(test_feature_sift)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'sift+RF.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.38750000, Recall: 0.38750000, Precision: 0.98622788, F1 Score: 0.54647303, AUC: 0.69291667
Run 2 - Accuracy: 0.41000000, Recall: 0.41000000, Precision: 0.98012452, F1 Score: 0.57278341, AUC: 0.70375000
Run 3 - Accuracy: 0.40125000, Recall: 0.40125000, Precision: 0.97523920, F1 Score: 0.56389430, AUC: 0.69895833
Run 4 - Accuracy: 0.39625000, Recall: 0.39625000, Precision: 0.97083424, F1 Score: 0.55354101, AUC: 0.69625000
Run 5 - Accuracy: 0.38750000, Recall: 0.38750000, Precision: 0.99009901, F1 Score: 0.55094336, AUC: 0.69291667
Run 6 - Accuracy: 0.41250000, Recall: 0.41250000, Precision: 0.99336283, F1 Score: 0.57231683, AUC: 0.70562500
Run 7 - Accuracy: 0.40250000, Recall: 0.40250000, Precision: 0.98781364, F1 Score: 0.56225441, AUC: 0.70020833
Run 8 - Accuracy: 0.39375000, Recall: 0.39375000, Precision: 0.98970640, F1 Score: 0.55653594, AUC: 0.69604167
Run 9 - Accuracy: 0.40625000, Recall: 0.40625000, Precision: 0.98761615, F1 Score: 0.56825057, AUC: 0.70229167
R

# sift + Decision Tree Classifier

In [6]:
from sklearn.tree import DecisionTreeClassifier

def train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test):

        dt = DecisionTreeClassifier()
        dt.fit(train_feature_sift, y_train)
        test_pred = dt.predict(test_feature_sift)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'sift+DTC.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.56125000, Recall: 0.56125000, Precision: 0.56131058, F1 Score: 0.56013359, AUC: 0.70750000
Run 2 - Accuracy: 0.55000000, Recall: 0.55000000, Precision: 0.55044278, F1 Score: 0.54928249, AUC: 0.70000000
Run 3 - Accuracy: 0.56625000, Recall: 0.56625000, Precision: 0.56638251, F1 Score: 0.56560877, AUC: 0.71083333
Run 4 - Accuracy: 0.54875000, Recall: 0.54875000, Precision: 0.55120321, F1 Score: 0.54836672, AUC: 0.69916667
Run 5 - Accuracy: 0.56375000, Recall: 0.56375000, Precision: 0.56306239, F1 Score: 0.56242840, AUC: 0.70916667
Run 6 - Accuracy: 0.56625000, Recall: 0.56625000, Precision: 0.56592556, F1 Score: 0.56569422, AUC: 0.71083333
Run 7 - Accuracy: 0.54750000, Recall: 0.54750000, Precision: 0.54783824, F1 Score: 0.54644590, AUC: 0.69833333
Run 8 - Accuracy: 0.55000000, Recall: 0.55000000, Precision: 0.55050838, F1 Score: 0.54943599, AUC: 0.70000000
Run 9 - Accuracy: 0.57000000, Recall: 0.57000000, Precision: 0.57017653, F1 Score: 0.56900610, AUC: 0.71333333
R

# sift + KNN Classifier

In [7]:
from sklearn.neighbors import KNeighborsClassifier

def train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test):

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(train_feature_sift, y_train)
        test_pred = knn.predict(test_feature_sift)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'sift+KNN.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 2 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 3 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 4 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 5 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 6 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 7 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 8 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
Run 9 - Accuracy: 0.41125000, Recall: 0.41125000, Precision: 0.54783309, F1 Score: 0.46153883, AUC: 0.64500000
R

# sift + SVM Classifier

In [8]:
from sklearn.svm import SVC

def train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test):

        y_train_cat = convert_one_hot_to_categorical(y_train)
        y_test_cat = convert_one_hot_to_categorical(y_test)

        # Define and train SVM Classifier
        svm = SVC(probability=True)
        svm.fit(train_feature_sift, y_train_cat)
        test_pred = svm.predict(test_feature_sift)
        test_pred_proba = svm.predict_proba(test_feature_sift)

        # Calculate evaluation metrics
        accuracy = accuracy_score(y_test_cat, test_pred)
        recall = recall_score(y_test_cat, test_pred, average='weighted')
        precision = precision_score(y_test_cat, test_pred, average='weighted')
        f1 = f1_score(y_test_cat, test_pred, average='weighted')
        auc = roc_auc_score(y_test_cat, test_pred_proba, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'sift+SVM.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77217917
Run 2 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77181667
Run 3 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77220000
Run 4 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77216250
Run 5 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77225000
Run 6 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77216875
Run 7 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77227083
Run 8 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77213542
Run 9 - Accuracy: 0.51125000, Recall: 0.51125000, Precision: 0.52006392, F1 Score: 0.50708452, AUC: 0.77209375
R

In [9]:
from xgboost import XGBClassifier

def train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test):

        y_train_cat = convert_one_hot_to_categorical(y_train)
        y_test_cat = convert_one_hot_to_categorical(y_test)

        xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
        xgb.fit(train_feature_sift, y_train_cat)
        test_pred = xgb.predict(test_feature_sift)
        test_pred_proba = xgb.predict_proba(test_feature_sift)

        # Calculate evaluation metrics
        accuracy = accuracy_score(y_test_cat, test_pred)
        recall = recall_score(y_test_cat, test_pred, average='weighted')
        precision = precision_score(y_test_cat, test_pred, average='weighted')
        f1 = f1_score(y_test_cat, test_pred, average='weighted')
        auc = roc_auc_score(y_test_cat, test_pred_proba, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_fuse_and_evaluate_model(train_feature_sift, y_train, test_feature_sift, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Calculate average metrics
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

# Print average metrics
print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Save results to an Excel file
output_file = 'sift+XGB.xlsx'
results_df.to_excel(output_file, index_label='Run')

print(f"\nMetrics results saved to '{output_file}'")


Run 1 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 2 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 3 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 4 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 5 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 6 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 7 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 8 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
Run 9 - Accuracy: 0.64750000, Recall: 0.64750000, Precision: 0.65078422, F1 Score: 0.64650950, AUC: 0.86420417
R