In [1]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, 
    mean_absolute_error, mean_squared_error, r2_score
)
import xgboost as xgb


In [2]:
def load_data(folder):
    data = []
    labels = []
    for label, category in enumerate(["healthy", "unhealthy"]):
        path = os.path.join(folder, category)
        for file in os.listdir(path):
            img_path = os.path.join(path, file)
            image = cv2.imread(img_path)
            if image is not None:
                image = cv2.resize(image, (128, 128))  # Resize to 128x128
                data.append(image.flatten())  # Flatten the image
                labels.append(label)
    return np.array(data), np.array(labels)

train_folder = r"D:\AI Algorithm\cucumber\train"
test_folder = r"D:\AI Algorithm\cucumber\test"
val_folder = r"D:\AI Algorithm\cucumber\validation"

X_train, y_train = load_data(train_folder)
X_test, y_test = load_data(test_folder)
X_val, y_val = load_data(val_folder)


In [3]:
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0


In [4]:
model = xgb.XGBClassifier(
    objective="binary:logistic",
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    eval_metric="logloss"
)


In [5]:
model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=True)


XGBoostError: [16:47:40] C:\buildkite-agent\builds\buildkite-windows-cpu-autoscaling-group-i-0c55ff5f71b100e98-1\xgboost\xgboost-ci-windows\src\common\io.h:320: bad_malloc: Failed to allocate 6222190832 bytes.

In [None]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]  # Probabilities for ROC-AUC


In [None]:
# Accuracy, Precision, Recall, F1
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# AUC
auc = roc_auc_score(y_test, y_prob)

# Error Metrics
mae = mean_absolute_error(y_test, y_prob)
mse = mean_squared_error(y_test, y_prob)
rmse = np.sqrt(mse)

# Percentage Errors
mape = np.mean(np.abs((y_test - y_prob) / y_test)) * 100
smape = 100 / len(y_test) * np.sum(2 * np.abs(y_test - y_prob) / (np.abs(y_test) + np.abs(y_prob)))

# R2 Score and Explained Variance
r2 = r2_score(y_test, y_prob)
explained_variance = 1 - np.var(y_test - y_prob) / np.var(y_test)

# Mean Bias Deviation
mbd = np.mean(y_test - y_prob)

# Print Metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print(f"AUC: {auc:.2f}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}")
print(f"Symmetric Mean Absolute Percentage Error (SMAPE): {smape:.2f}")
print(f"Explained Variance Score: {explained_variance:.2f}")
print(f"R^2 Score: {r2:.2f}")
print(f"Mean Bias Deviation (MBD): {mbd:.2f}")



from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Healthy', 'Unhealthy'])

# Plot confusion matrix
#disp.plot(cmap=plt.cm.Blues)
#plt.title("Confusion Matrix")
#plt.show()


In [None]:
model.save_model("xgboost_cucumber_leaf.model")


In [None]:
import joblib
import xgboost as xgb

# Train your model (example)
model = xgb.XGBClassifier()
# model.fit(X_train, y_train)  # Ensure you have trained the model before saving

# Save the trained model
joblib.dump(model, "xgboost_cucumber_leaf.pkl")


In [None]:
import matplotlib.pyplot as plt

# Train the model and track evaluation metrics on the validation set
evals_result = {}  # Dictionary to store evaluation results

model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], 
          eval_metric="logloss", verbose=True, 
          early_stopping_rounds=10, evals_result=evals_result)

# Plotting the learning curve
epochs = len(evals_result['validation_0']['logloss'])
x_axis = range(0, epochs)

# Plot training and validation logloss
plt.plot(x_axis, evals_result['validation_0']['logloss'], label='Train logloss')
plt.plot(x_axis, evals_result['validation_1']['logloss'], label='Validation logloss')
plt.xlabel('Epochs')
plt.ylabel('Logloss')
plt.title('XGBoost Learning Curve')
plt.legend()
plt.grid(True)
plt.show()
