# Data Loading & Preprocessing

In [1]:
import os
import cv2
import numpy as np
from skimage.feature import hog, local_binary_pattern
from sklearn.preprocessing import LabelBinarizer

def load_images_from_folder(folder_path, target_size=(128, 128)):
    images = []
    labels = []
    class_names = os.listdir(folder_path)
    class_names.sort()

    for class_name in class_names:
        class_path = os.path.join(folder_path, class_name)
        if os.path.isdir(class_path):
            for filename in os.listdir(class_path):
                img_path = os.path.join(class_path, filename)
                if img_path.endswith(('.jpg', '.jpeg', '.png')):
                    img = cv2.imread(img_path) #, cv2.IMREAD_GRAYSCALE
                    if img is not None:
                        img_resized = cv2.resize(img, target_size)
                        
#                         hog_features = hog(img_resized, block_norm='L2-Hys', pixels_per_cell=(16, 16))
#                         lbp_features = local_binary_pattern(img_resized, P=8, R=1, method='uniform')
                        images.append(img_resized.flatten()) #
                        labels.append(class_name)

    return np.array(images), np.array(labels)

# Loading training data
train_folder_path = 'C:\\Users\\mzain\\Desktop\\Guava\\Nabiha\\dataset\\train_data'
train_images, train_labels = load_images_from_folder(train_folder_path)

# Loading validation data
val_folder_path = 'C:\\Users\\mzain\\Desktop\\Guava\\Nabiha\\dataset\\validation_data'
val_images, val_labels = load_images_from_folder(val_folder_path)

# Loading test data
test_folder_path = 'C:\\Users\\mzain\\Desktop\\Guava\\Nabiha\\dataset\\test_data'
test_images, test_labels = load_images_from_folder(test_folder_path)

# One-hot encode the labels
label_binarizer = LabelBinarizer()
train_labels_one_hot = label_binarizer.fit_transform(train_labels)
val_labels_one_hot = label_binarizer.transform(val_labels)
test_labels_one_hot = label_binarizer.transform(test_labels)


In [11]:
X_train_flat = train_images
X_val_flat = val_images
X_test_flat = test_images

y_train = train_labels_one_hot
y_val = val_labels_one_hot
y_test = test_labels_one_hot



In [3]:
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from tensorflow.keras.preprocessing import image





In [12]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MaxAbsScaler

scaler = StandardScaler() #67 knn
# scaler = MinMaxScaler() #64 knn
# scaler = RobustScaler() #69 knn
# scaler = MaxAbsScaler() #65 knn

X_train_flat = scaler.fit_transform(X_train_flat)
X_val_flat = scaler.transform(X_val_flat)
X_test_flat = scaler.transform(X_test_flat)

print('X_val_flat scaled = ',X_val_flat)

print('Done!')

X_val_flat scaled =  [[1.45205897 1.46008505 1.45973513 ... 1.48234594 1.50147173 1.53833515]
 [0.82498855 0.92691257 1.01816672 ... 1.71008152 1.68628016 1.67408641]
 [1.45205897 1.46008505 1.45973513 ... 0.94297219 1.00479906 1.04058055]
 ...
 [1.45205897 1.46008505 1.45973513 ... 1.02687477 1.08565275 1.11976878]
 [1.45205897 1.46008505 1.45973513 ... 0.96694435 1.00479906 1.06320576]
 [1.45205897 1.46008505 1.45973513 ... 0.91900002 0.98169801 1.01795534]]
Done!


# 1st Model (KNN)

In [13]:
print(X_train_flat.size)

52789248


In [14]:

# Create and train the KNN classifier
knn_model = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan')

knn_model.fit(X_train_flat, np.argmax(y_train, axis=1))


# Make predictions on the validation set
knn_val_predictions = knn_model.predict(X_val_flat)

# Evaluate the model
knn_accuracy = accuracy_score(np.argmax(y_val, axis=1), knn_val_predictions)
print(f"KNN Test Accuracy: {knn_accuracy}")

print('KNN Model Done & Ready') #70 @ n_neighbors=3, weights='distance', metric='manhattan' # 65 for robust

KNN Test Accuracy: 0.9236947791164659
KNN Model Done & Ready


In [15]:

# Create and train the KNN classifier
knn2_model = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='canberra')

knn2_model.fit(X_train_flat, np.argmax(y_train, axis=1))


# Make predictions on the validation set
knn2_val_predictions = knn2_model.predict(X_val_flat)

# Evaluate the model
knn2_accuracy = accuracy_score(np.argmax(y_val, axis=1), knn2_val_predictions)
print(f"KNN Validation Accuracy: {knn2_accuracy}")

print('KNN2 Model Done & Ready') #70 @ n_neighbors=3, weights='distance', metric='manhattan' # 65 for robust

KNN Validation Accuracy: 0.9196787148594378
KNN2 Model Done & Ready


# 2nd Model (SVM)

In [16]:
# Create and train the SVM classifier
svm_model = SVC(kernel='rbf', C=10) 
svm_model.fit(X_train_flat, np.argmax(y_train, axis=1))


In [17]:
# Make predictions on the validation set
svm_val_predictions = svm_model.predict(X_val_flat)

# Evaluate the model
svm_accuracy = accuracy_score(np.argmax(y_val, axis=1), svm_val_predictions)
print(f"SVM Test Accuracy: {svm_accuracy}")

print('SVM Model Done & Ready') #85% @kernel='rbf', C=5  #76 for robust

SVM Test Accuracy: 0.9317269076305221
SVM Model Done & Ready


# 3rd Model (ANN)

In [21]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

import numpy as np
import pandas as pd
import time
import levenberg_marquardt as lm

# Create and train the ANN model
ann2_model = Sequential([
    Dense(512, activation='elu', input_shape=(X_train_flat.shape[1],)),
    BatchNormalization(),
    Dropout(0.2),
    Dense(1024, activation='elu'),
    BatchNormalization(),
    Dropout(0.1),
    Dense(512, activation='elu'),
    BatchNormalization(),
    Dropout(0.1),
    Dense(128, activation='elu'),
    BatchNormalization(),
    Dense(len(np.unique(np.argmax(y_train, axis=1))), activation='softmax')  # Assuming you have a classification task
])

# Compile the model
ann2_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
ann2_model.fit(X_train_flat, y_train, epochs=30, batch_size=32) #, validation_data=(X_test_flat, y_test)

# Make predictions on the validation set
ann2_val_predictions = np.argmax(ann2_model.predict(X_val_flat), axis=1)

# Evaluate the model
ann2_accuracy = accuracy_score(np.argmax(y_val, axis=1), ann2_val_predictions)
print(f"ANN Test Accuracy: {ann2_accuracy}")

print('ANN Model Done & Ready') #86% | 80 for robust 512,0.5,1024,0.1,512,0.1,128

# model_wrapper = lm.ModelWrapper(
#     tf.keras.models.clone_model(ann2_model))

# model_wrapper.compile(
#     optimizer=tf.keras.optimizers.SGD(learning_rate=1.0),
#     loss=lm.MeanSquaredError())

# train_dataset = tf.data.Dataset.from_tensor_slices((X_train_flat, y_train))
# train_dataset = train_dataset.shuffle(len(X_train_flat))
# train_dataset = train_dataset.batch(1000).cache()
# train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)

# # Training using Levenberg-Marquardt
# print("\n_________________________________________________________________")
# print("Train using Levenberg-Marquardt")
# t2_start = time.perf_counter()
# history_lm = model_wrapper.fit(train_dataset, epochs=100)
# t2_stop = time.perf_counter()
# print("Elapsed time: ", t2_stop - t2_start)



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
ANN Test Accuracy: 0.8273092369477911
ANN Model Done & Ready


# 4th Model (Random Forest)

In [22]:
# Import the Random Forest classifier
from sklearn.ensemble import RandomForestClassifier

# Create and train the Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_flat, np.argmax(y_train, axis=1))

# Make predictions on the validation set
rf_val_predictions = rf_model.predict(X_val_flat)

# Evaluate the model
rf_accuracy = accuracy_score(np.argmax(y_val, axis=1), rf_val_predictions)
print(f"Random Forest Test Accuracy: {rf_accuracy}")

print('Random Forest Model Done & Ready') # 70 for robust


Random Forest Test Accuracy: 0.927710843373494
Random Forest Model Done & Ready


# Model Ensemble (Choose one of four methods)

## Stacking Predictions as Inputs

### 1. To Train

In [23]:
# Make predictions on the validation set using each individual model
knn_metatrain_predictions = knn_model.predict(X_val_flat)

In [24]:
knn2_val_predictions = knn2_model.predict(X_val_flat)

In [25]:
svm_metatrain_predictions = svm_model.predict(X_val_flat)

In [27]:
ann2_metatrain_predictions = np.argmax(ann2_model.predict(X_val_flat), axis=1)



In [28]:
rf_metatrain_predictions = rf_model.predict(X_val_flat)

In [29]:

# Combine individual model predictions
X_ensemble_metatrain_predictions = np.vstack((knn_metatrain_predictions,knn2_val_predictions,ann2_metatrain_predictions,svm_metatrain_predictions, rf_metatrain_predictions)).T

### 2. To Test

In [33]:

knn_metatest_predictions = knn_model.predict(X_test_flat)

knn2_test_predictions = knn2_model.predict(X_test_flat)

svm_metatest_predictions = svm_model.predict(X_test_flat)

ann2_metatest_predictions = np.argmax(ann2_model.predict(X_test_flat), axis=1) 

rf_metatest_predictions = rf_model.predict(X_test_flat)


# Combine individual model predictions
X_ensemble_metatest_predictions = np.vstack((knn_metatest_predictions,knn2_test_predictions,ann2_metatest_predictions,svm_metatest_predictions, rf_metatest_predictions)).T


## Method 1 (Ensemble) Direct Testing (No Training Step Involved)

In [34]:
from scipy.stats import mode

# Take the majority vote
final_predictions, _ = mode(X_ensemble_metatrain_predictions, axis=1)

# Evaluate the ensemble model
ensemble_accuracy = accuracy_score(np.argmax(y_val, axis=1), final_predictions.flatten()) #
print(f"Ensemble Validation Accuracy: {ensemble_accuracy}")

print('Ensemble Model Done') # 75%

#This is actually the test accuracy because it did not used validation set for training so we used it for testing instead.

Ensemble Validation Accuracy: 0.9317269076305221
Ensemble Model Done


## Method 2 (MLP Meta Learner) Train & Test

In [35]:

# Train MLP as a meta-learner on base model predictions
mlp_meta_learner = MLPClassifier(hidden_layer_sizes=(16,32,16), max_iter=700)
mlp_meta_learner.fit(X_ensemble_metatrain_predictions, np.argmax(y_val, axis=1)) #y_val

# Make predictions using the meta-learner
train_meta_learner_predictions = mlp_meta_learner.predict(X_ensemble_metatest_predictions)

# Evaluate the ensemble model with the meta-learner
meta_test_accuracy = accuracy_score(np.argmax(y_test, axis=1), train_meta_learner_predictions) #y_val
print(f"Ensemble with Meta-Learner Train Accuracy: {meta_test_accuracy}")

print('Meta-Learner Done')


Ensemble with Meta-Learner Train Accuracy: 0.963855421686747
Meta-Learner Done


## Method 3 KNN Train & Test


In [36]:
# Train KNN as a meta-learner on base model predictions
knn_meta_learner = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='manhattan')  # You can adjust the number of neighbors (n_neighbors) as needed
knn_meta_learner.fit(X_ensemble_metatrain_predictions, np.argmax(y_val, axis=1))

# Make predictions using the meta-learner
train_knn_metalearner_predictions = knn_meta_learner.predict(X_ensemble_metatest_predictions)

# Evaluate the ensemble model with the meta-learner
knn_metatest_accuracy = accuracy_score(np.argmax(y_test, axis=1), train_knn_metalearner_predictions)
print(f"Ensemble with Meta-Learner (KNN) Train Accuracy: {knn_metatest_accuracy}")

print('Meta-Learner (KNN) Done')

Ensemble with Meta-Learner (KNN) Train Accuracy: 0.9799196787148594
Meta-Learner (KNN) Done


## Method 4 SVM Train & Test

In [37]:

# Train SVM as a meta-learner on base model predictions
svm_meta_learner = SVC(kernel='rbf', C=10)
svm_meta_learner.fit(X_ensemble_metatrain_predictions, np.argmax(y_val, axis=1))

# Make predictions using the meta-learner
train_meta_learner_predictions = svm_meta_learner.predict(X_ensemble_metatest_predictions)

# Evaluate the ensemble model with the meta-learner
meta_test_accuracy = accuracy_score(np.argmax(y_test, axis=1), train_meta_learner_predictions)
print(f"Ensemble with Meta-Learner Train Accuracy: {meta_test_accuracy}")

print('Meta-Learner Done') #Last 95 for Guava Species


Ensemble with Meta-Learner Train Accuracy: 0.9678714859437751
Meta-Learner Done


In [3]:
#KNN has the highest test accuracy 97.9% as a metalearner