In [1]:
# Import Libraries
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, 
    confusion_matrix, mean_absolute_error, mean_squared_error, explained_variance_score, r2_score
)
from math import sqrt


In [2]:
def load_data(folder_healthy, folder_unhealthy, img_size=(224, 224)):
    data, labels = [], []
    
    # Load healthy images
    for img_name in os.listdir(folder_healthy):
        img_path = os.path.join(folder_healthy, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
        
        if img is None:
            print(f"Error loading image: {img_path}")
            continue

        img = cv2.resize(img, img_size) / 255.0  # Normalize
        data.append(img.flatten())  # Flatten the image
        labels.append(0)  # 0 = Healthy
    
    # Load unhealthy images
    for img_name in os.listdir(folder_unhealthy):
        img_path = os.path.join(folder_unhealthy, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        if img is None:
            print(f"Error loading image: {img_path}")
            continue
        
        img = cv2.resize(img, img_size) / 255.0
        data.append(img.flatten())
        labels.append(1)  # 1 = Unhealthy
    
    return np.array(data), np.array(labels)


In [3]:
# Paths
folder_healthy = r"D:\AI Algorithm\Castor\healthy spikes"
folder_unhealthy = r"D:\AI Algorithm\Castor\infected spikes"

# Load data
X, y = load_data(folder_healthy, folder_unhealthy)


In [4]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [5]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
pca = PCA(n_components=0.95, random_state=42)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)


In [7]:
# Initialize and train XGBoost model
xgb = XGBClassifier(
    objective='binary:logistic', 
    eval_metric='logloss', 
    use_label_encoder=False, 
    random_state=42
)

xgb.fit(X_train, y_train)


Parameters: { "use_label_encoder" } are not used.



AttributeError: 'super' object has no attribute '__sklearn_tags__'

AttributeError: 'super' object has no attribute '__sklearn_tags__'

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='logloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=None, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=None,
              n_jobs=None, num_parallel_tree=None, random_state=42, ...)

In [8]:
# Predictions
y_pred = xgb.predict(X_test)
y_prob = xgb.predict_proba(X_test)[:, 1]

# Classification Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
conf_matrix = confusion_matrix(y_test, y_pred)

# Regression Metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = sqrt(mse)
explained_variance = explained_variance_score(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mbd = np.mean(y_test - y_pred)


In [9]:
# Print all metrics
print("Evaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"AUC: {auc:.4f}")

print("\nConfusion Matrix:")
print(conf_matrix)

print("\nRegression Metrics:")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"Explained Variance Score: {explained_variance:.4f}")
print(f"R² Score: {r2:.4f}")
print(f"Mean Bias Deviation (MBD): {mbd:.4f}")


Evaluation Metrics:
Accuracy: 0.9188
Precision: 0.9079
Recall: 0.9918
F1-Score: 0.9480
AUC: 0.9458

Confusion Matrix:
[[117  49]
 [  4 483]]

Regression Metrics:
MAE: 0.0812
MSE: 0.0812
RMSE: 0.2849
Explained Variance Score: 0.5969
R² Score: 0.5719
Mean Bias Deviation (MBD): -0.0689
