# Imports and Setup

In [31]:
# Importing Necessary Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
import os
import glob as gb
import cv2
import PIL
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
from keras.models import Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Data Preparation

In [32]:
trainpath = '/kaggle/input/riceleafdiseasedataset/dataset/train'
testpath = '/kaggle/input/riceleafdiseasedataset/dataset/test'

# Image Processing

In [33]:
# Image Processing - Training Data
new_size = 224
train_images = []
train_labels = []
class_disease = {'BacterialBlight': 0, 'Blast': 1, 'BrownSpot': 2, 'Tungro': 3}

for i in os.listdir(trainpath):
    if i in class_disease:
        print("Entering the folder:", i)
        files = gb.glob(pathname=str(trainpath + '/' + i + '/*.jpg')) + gb.glob(pathname=str(trainpath + '/' + i + '/*.JPG'))
        print("Number of images in the folder:", len(files))
        for j in files:
            image_raw = cv2.imread(j)
            image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
            resize_image = cv2.resize(image, (new_size, new_size))
            train_images.append(list(resize_image))
            train_labels.append(class_disease[i])

# Image Processing - Testing Data
new_size = 224
test_images = []
test_labels = []

for i in os.listdir(testpath):
    if i in class_disease:
        print("Entering to the folder name:", i)
        files = gb.glob(pathname=str(testpath + '/' + i + '/*.jpg')) + gb.glob(pathname=str(testpath + '/' + i + '/*.JPG'))
        print("Number of images in the folder is", len(files))
        for j in files:
            image_raw = cv2.imread(j)
            image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
            resize_image = cv2.resize(image, (new_size, new_size))
            test_images.append(list(resize_image))
            test_labels.append(class_disease[i])

def list_to_array_train(train_images, train_labels):
    return np.array(train_images), np.array(train_labels)

X_train, y_train = list_to_array_train(train_images, train_labels)

def list_to_array_test(test_images, test_labels):
    return np.array(test_images), np.array(test_labels)

X_test, y_test = list_to_array_test(test_images, test_labels)

print(X_train.shape)
print("*" * 20)
print(y_train.shape)
print("*" * 20)
print(X_test.shape)
print(y_test.shape)

def keras_to_categorical(y_train, y_test):
    return to_categorical(y_train), to_categorical(y_test)

y_train1 = y_train
y_test1 = y_test
y_train, y_test = keras_to_categorical(y_train, y_test)

y_train1.shape, y_test1.shape

def convert_one_hot_to_categorical(one_hot_labels):
    return np.argmax(one_hot_labels, axis=1)

gpus = tf.config.list_physical_devices('GPU')
print(f"Num GPUs Available: {len(gpus)}")

if len(gpus) < 2:
    print("Not enough GPUs available, ensure your environment is configured correctly")
else:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    strategy = tf.distribute.MirroredStrategy()

Entering the folder: Tungro
Number of images in the folder: 1108
Entering the folder: BacterialBlight
Number of images in the folder: 1384
Entering the folder: Blast
Number of images in the folder: 1240
Entering the folder: BrownSpot
Number of images in the folder: 1400
Entering to the folder name: Tungro
Number of images in the folder is 200
Entering to the folder name: BacterialBlight
Number of images in the folder is 200
Entering to the folder name: Blast
Number of images in the folder is 200
Entering to the folder name: BrownSpot
Number of images in the folder is 200
(5132, 224, 224, 3)
********************
(5132,)
********************
(800, 224, 224, 3)
(800,)
Num GPUs Available: 2


# Model Definition - InceptionV3

In [34]:
# Deep Feature Extraction - InceptionV3
def model_inceptionV3():
    Inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in Inception_model.layers:
        layer.trainable = False
    feature = GlobalAveragePooling2D()(Inception_model.output)
    output = Model(inputs=Inception_model.input, outputs=feature)
    return output

# Training and Evaluation - Random Forest

In [35]:
# Random Forest Classifier - VGG19 Deep Features
def train_and_evaluate_model(X_train, y_train, X_test, y_test):
    with strategy.scope():
        model_inception = model_inceptionV3()
        model_inception.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
        train_feature_v3 = model_inception.predict(X_train)
        test_feature_v3 = model_inception.predict(X_test)

        rf = RandomForestClassifier()
        rf = rf.fit(train_feature_v3, y_train)
        test_pred = rf.predict(test_feature_v3)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_and_evaluate_model(X_train, y_train, X_test, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 101ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step
Run 1 - Accuracy: 0.87125000, Recall: 0.87125000, Precision: 0.98202485, F1 Score: 0.91660881, AUC: 0.93270833
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 75ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
Run 2 - Accuracy: 0.86250000, Recall: 0.86250000, Precision: 0.98204372, F1 Score: 0.91059730, AUC: 0.92833333
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 78ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 3 - Accuracy: 0.86500000, Recall: 0.86500000, Precision: 0.98079540, F1 Score: 0.9

# Training and Evaluation - KNN

In [36]:
from sklearn.neighbors import KNeighborsClassifier

# KNN Classifier - ResNet50 Deep Features
def train_and_evaluate_model(X_train, y_train, X_test, y_test, run_seed):
    np.random.seed(run_seed)
    tf.random.set_seed(run_seed)
    
    with strategy.scope():
        model_inception = model_inceptionV3()
        model_inception.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
        train_feature_v3 = model_inception.predict(X_train)
        test_feature_v3 = model_inception.predict(X_test)

        knn = KNeighborsClassifier()
        knn.fit(train_feature_v3, y_train)
        test_pred = knn.predict(test_feature_v3)

        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    run_seed = i  # Set a different seed for each run
    accuracy, recall, precision, f1, auc = train_and_evaluate_model(X_train, y_train, X_test, y_test, run_seed)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")


[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 84ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 1 - Accuracy: 0.82875000, Recall: 0.82875000, Precision: 0.85443479, F1 Score: 0.83656277, AUC: 0.89145833
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 79ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 2 - Accuracy: 0.82875000, Recall: 0.82875000, Precision: 0.85443479, F1 Score: 0.83656277, AUC: 0.89145833
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 78ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
Run 3 - Accuracy: 0.82875000, Recall: 0.82875000, Precision: 0.85443479, F1 Score: 0.83656277, AUC: 0.89145833
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 77ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
Run 4 - Accuracy: 0.82875000, Recall: 0.82875000, Precision: 0.

# Training and Evaluation - SVM

In [37]:
# SVM Classifier - VGG19 Deep Features
def train_and_evaluate_model(X_train, y_train, X_test, y_test, run_seed):
    
    np.random.seed(run_seed)
    tf.random.set_seed(run_seed)
    
    with strategy.scope():
        model_inception = model_inceptionV3()
        model_inception.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
        train_feature_v3 = model_inception.predict(X_train)
        test_feature_v3 = model_inception.predict(X_test)

        y_train_cat = convert_one_hot_to_categorical(y_train)
        y_test_cat = convert_one_hot_to_categorical(y_test)

        svm = SVC(probability=True)
        svm.fit(train_feature_v3, y_train_cat)
        test_pred = svm.predict(test_feature_v3)
        test_pred_proba = svm.predict_proba(test_feature_v3)

        accuracy = accuracy_score(y_test_cat, test_pred)
        recall = recall_score(y_test_cat, test_pred, average='weighted')
        precision = precision_score(y_test_cat, test_pred, average='weighted')
        f1 = f1_score(y_test_cat, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred_proba, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    run_seed = i
    accuracy, recall, precision, f1, auc = train_and_evaluate_model(X_train, y_train, X_test, y_test, run_seed)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")

[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 81ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
Run 1 - Accuracy: 0.81375000, Recall: 0.81375000, Precision: 0.81828262, F1 Score: 0.80709072, AUC: 0.95130417
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 77ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 2 - Accuracy: 0.81375000, Recall: 0.81375000, Precision: 0.81828262, F1 Score: 0.80709072, AUC: 0.95130417
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 85ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
Run 3 - Accuracy: 0.81375000, Recall: 0.81375000, Precision: 0.81828262, F1 Score: 0.80709072, AUC: 0.95131250
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 77ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
Run 4 - Accuracy: 0.81375000, Recall: 0.81375000, Precision: 0.

# Training and Evaluation - Decision Tree Classifier

In [38]:
from sklearn.tree import DecisionTreeClassifier

# Function to train and evaluate the model
def train_and_evaluate_model(X_train, y_train, X_test, y_test):
    with strategy.scope():
        model_inception = model_inceptionV3()
        model_inception.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
        train_feature_v3 = model_inception.predict(X_train)
        test_feature_v3 = model_inception.predict(X_test)

        # Define and train Decision Tree Classifier
        dt = DecisionTreeClassifier()
        dt.fit(train_feature_v3, y_train)
        test_pred = dt.predict(test_feature_v3)

        # Calculate evaluation metrics
        accuracy = accuracy_score(y_test, test_pred)
        recall = recall_score(y_test, test_pred, average='weighted')
        precision = precision_score(y_test, test_pred, average='weighted')
        f1 = f1_score(y_test, test_pred, average='weighted')
        auc = roc_auc_score(y_test, test_pred, multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

# Perform multiple runs and store the results
num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_and_evaluate_model(X_train, y_train, X_test, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Compute average and standard deviation for each metric
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")


[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 77ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
Run 1 - Accuracy: 0.82875000, Recall: 0.82875000, Precision: 0.82706192, F1 Score: 0.82593870, AUC: 0.88583333
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 78ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 2 - Accuracy: 0.82875000, Recall: 0.82875000, Precision: 0.83066497, F1 Score: 0.82636523, AUC: 0.88583333
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 76ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 3 - Accuracy: 0.83500000, Recall: 0.83500000, Precision: 0.83508106, F1 Score: 0.83408331, AUC: 0.89000000
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 77ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 4 - Accuracy: 0.84250000, Recall: 0.84250000, Precision: 0.

# Training and Evaluation - XGBoost Classifier

In [None]:
from xgboost import XGBClassifier

# Function to train and evaluate the model
def train_and_evaluate_model(X_train, y_train, X_test, y_test):
    with strategy.scope():
        model_inception = model_inceptionV3()
        model_inception.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

        train_feature_v3 = model_inception.predict(X_train)
        test_feature_v3 = model_inception.predict(X_test)

        y_train_cat = convert_one_hot_to_categorical(y_train)
        y_test_cat = convert_one_hot_to_categorical(y_test)

        # Define and train XGBoost Classifier
        xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
        xgb.fit(train_feature_v3, y_train_cat)
        test_pred = xgb.predict(test_feature_v3)

        # Calculate evaluation metrics
        accuracy = accuracy_score(y_test_cat, test_pred)
        recall = recall_score(y_test_cat, test_pred, average='weighted')
        precision = precision_score(y_test_cat, test_pred, average='weighted')
        f1 = f1_score(y_test_cat, test_pred, average='weighted')
        auc = roc_auc_score(y_test_cat, xgb.predict_proba(test_feature_v3), multi_class='ovr', average='weighted')

        return accuracy, recall, precision, f1, auc

# Perform multiple runs and store the results
num_runs = 10
results = {'accuracy': [], 'recall': [], 'precision': [], 'f1': [], 'auc': []}

for i in range(num_runs):
    accuracy, recall, precision, f1, auc = train_and_evaluate_model(X_train, y_train, X_test, y_test)
    results['accuracy'].append(accuracy)
    results['recall'].append(recall)
    results['precision'].append(precision)
    results['f1'].append(f1)
    results['auc'].append(auc)
    print(f"Run {i+1} - Accuracy: {accuracy:.8f}, Recall: {recall:.8f}, Precision: {precision:.8f}, F1 Score: {f1:.8f}, AUC: {auc:.8f}")

# Compute average and standard deviation for each metric
average_metrics = {metric: np.mean(values) for metric, values in results.items()}
std_metrics = {metric: np.std(values) for metric, values in results.items()}

print("\nAverage Metrics:")
for metric, value in average_metrics.items():
    print(f"{metric.capitalize()}: {value:.8f} (std: {std_metrics[metric]:.8f})")


[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 75ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
Run 1 - Accuracy: 0.95000000, Recall: 0.95000000, Precision: 0.95183037, F1 Score: 0.94925644, AUC: 0.99365000
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 88ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 2 - Accuracy: 0.95000000, Recall: 0.95000000, Precision: 0.95183037, F1 Score: 0.94925644, AUC: 0.99365000
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 76ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
Run 3 - Accuracy: 0.95000000, Recall: 0.95000000, Precision: 0.95183037, F1 Score: 0.94925644, AUC: 0.99365000
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 77ms/step
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step
Run 4 - Accuracy: 0.95000000, Recall: 0.95000000, Precision: 0.

# Results and Metrics