In [1]:
## Import system library
import os
import glob

## Import Image Processing library
import cv2
import numpy as np
import skimage
from skimage import exposure
from skimage.color import rgb2gray
from scipy.ndimage import convolve, uniform_filter
from skimage.filters import gabor_kernel, unsharp_mask, threshold_otsu, gabor
from skimage.morphology import disk, closing, dilation
import torchvision.models as models
import matplotlib.pyplot as plt
%matplotlib inline

## Import machine learning library
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from skimage.feature import hog
from sklearn.metrics import f1_score, accuracy_score
from sklearn.utils import resample
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score

from utils.lbp import LBP
from utils.vis import draw_bboxes
from utils.preprocess import sliding_window
from utils.dataset import load_yolo_labels
from utils.bboxes import iou, calculate_boxA_percentage
from utils.intensity_transforms import histogram_matching, calculate_mean_histogram
from utils.dataset import adjust_labels_for_pooling, resize_image_and_bboxes
from models.kernels import AlexNetDescriptor

In [2]:
## Defind dataset folders

# root_path = '/content/drive/Shareddrives/Wrist_fracture_detectiom/ML/Dataset'
root_path = 'MLDataset/crop_data'

img_train_folder = 'train'
img_test_folder = 'test'
label_folder = 'labels'
image_folder = 'images'

In [3]:
def export_features(export_path, name, feature_name, feature_list):
    """Save features to npy file

    Args:
        export_path (str): path to save the file
        name (str): file name to save
        feature_name (str): name of the feature. Ex: 'lbp', 'hog', 'gabor'
        feature_list (list/array): list of features
    """
    ## Export features to file
    save_name = f'{name}_{feature_name}.npy'
    save_path = os.path.join(export_path, save_name)
    np.save(save_path, feature_list)
    print(f'Save {feature_name} features to {save_path}')

## Machine Learning Model

### Load data

In [11]:
dataset_path = 'MLDataset/crop_data'

In [20]:
def undersample_data(features, labels):
    """Undersample the data

    Args:
        features (np.array): features
        labels (np.array): labels

    Returns:
        np.array: undersampled features
        np.array: undersampled labels
    """
    # Find the number of samples in each class
    X_majority = features[labels == 0]
    X_minority = features[labels == 1]
    y_majority = labels[labels == 0]
    y_minority = labels[labels == 1]
    
    # Downsample the majority class
    X_majority_downsampled, y_majority_downsampled = resample(X_majority, y_majority, replace=False, n_samples=len(X_minority), random_state=42)
    
    # Combine the minority class with the downsampled majority class
    X_downsampled = np.concatenate([X_majority_downsampled, X_minority])
    y_downsampled = np.concatenate([y_majority_downsampled, y_minority])
    
    return X_downsampled, y_downsampled

In [27]:
# feature_list = ['hog', 'alex']
# feature_list =['hog']
# feature_list =['hog_canny']
# feature_list =['lbp']
feature_list =['alex']

train_features_list = []
train_labels = None
test_features_list = []
test_labels = None

for feature_name in feature_list:
    # Load the dictionary from the .npz file
    train_fracture = np.load(os.path.join(root_path, f'train_fracture_{feature_name}.npy'), allow_pickle=True)
    train_normal = np.load(os.path.join(root_path, f'train_normal_{feature_name}.npy'), allow_pickle=True)
    train_fracture_labels = np.load(os.path.join(root_path, f'train_fracture_labels_{feature_name}.npy'), allow_pickle=True)
    train_normal_labels = np.load(os.path.join(root_path, f'train_normal_labels_{feature_name}.npy'), allow_pickle=True)

    test_fracture = np.load(os.path.join(root_path, f'test_fracture_{feature_name}.npy'), allow_pickle=True)
    test_normal = np.load(os.path.join(root_path, f'test_normal_{feature_name}.npy'), allow_pickle=True)
    test_fracture_labels = np.load(os.path.join(root_path, f'test_fracture_labels_{feature_name}.npy'), allow_pickle=True)
    test_normal_labels = np.load(os.path.join(root_path, f'test_normal_labels_{feature_name}.npy'), allow_pickle=True)

    train_features = np.concatenate([train_fracture, train_normal])
    train_labels= np.concatenate([train_fracture_labels, train_normal_labels])
    train_indices = np.where(train_labels!= -1)[0]
    train_features = train_features[train_indices]
    train_labels= train_labels[train_indices]

    test_features= np.concatenate([test_fracture, test_normal])
    test_labels = np.concatenate([test_fracture_labels, test_normal_labels])
    test_indices = np.where(test_labels!= -1)[0]
    test_features = test_features[test_indices]
    test_labels = test_labels[test_indices]
    
    # Append to the list
    train_features_list.append(train_features)
    test_features_list.append(test_features)

# Concatenate the features
train_features = np.concatenate(train_features_list, axis=1)
test_features = np.concatenate(test_features_list, axis=1)

train_features, train_labels = undersample_data(train_features, train_labels)

In [22]:
hog_train_features = train_features
hog_test_features = test_features

In [24]:
hog_canny_train_features = train_features
hog_canny_test_features = test_features

In [26]:
lbp_train_features = train_features
lbp_test_features = test_features

In [28]:
alex_train_features = train_features
alex_test_features = test_features

In [32]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [33]:
# Concatenate Features
X = np.concatenate([hog_train_features, hog_canny_train_features, lbp_train_features, alex_train_features], axis=1)
y = train_labels  # Labels for the images

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a classifier on the concatenated features
classifier = XGBClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Predict and evaluate
y_pred = classifier.predict(X_test)

print('Classification Report')
print(classification_report(y_test, y_pred))
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred))

Classification Report
              precision    recall  f1-score   support

           0       0.77      0.71      0.74       572
           1       0.71      0.78      0.74       532

    accuracy                           0.74      1104
   macro avg       0.74      0.74      0.74      1104
weighted avg       0.74      0.74      0.74      1104

Confusion Matrix
[[405 167]
 [119 413]]


In [34]:
hog_X_train, hog_X_test, hog_y_train, hog_y_test = train_test_split(hog_train_features, train_labels, test_size=0.3, random_state=42)
hog_canny_X_train, hog_canny_X_test, hog_canny_y_train, hog_canny_y_test = train_test_split(hog_canny_train_features, train_labels, test_size=0.3, random_state=42)
lbp_X_train, lbp_X_test, lbp_y_train, lbp_y_test = train_test_split(lbp_train_features, train_labels, test_size=0.3, random_state=42)
alex_X_train, alex_X_test, alex_y_train, alex_y_test = train_test_split(alex_train_features, train_labels, test_size=0.3, random_state=42)

In [35]:
# Feature-Level Ensemble
hog_model = XGBClassifier(n_estimators=100, random_state=42).fit(hog_X_train, hog_y_train)
hog_canny_model = LGBMClassifier(n_estimators=100, random_state=42).fit(hog_canny_X_train, hog_canny_y_train)
lbp_model = XGBClassifier(n_estimators=100, random_state=42).fit(lbp_X_train, lbp_y_train)
alex_model = LGBMClassifier(n_estimators=100, random_state=42).fit(alex_X_train, alex_y_train)

[LightGBM] [Info] Number of positive: 1308, number of negative: 1268
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003986 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 36717
[LightGBM] [Info] Number of data points in the train set: 2576, number of used features: 144
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507764 -> initscore=0.031058
[LightGBM] [Info] Start training from score 0.031058
[LightGBM] [Info] Number of positive: 1308, number of negative: 1268
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000819 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12495
[LightGBM] [Info] Number of data points in the train set: 2576, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507764 -> initscore=0.031058
[LightGBM] [Info] Start training from score 0.031058


In [46]:
from sklearn.ensemble import VotingClassifier
# Combine predictions (Voting Classifier as an example)
voting_clf = VotingClassifier(estimators=[
    ('hog', hog_model), ('hog_canny', hog_canny_model), ('lbp', lbp_model), ('alex', alex_model)], voting='hard')

In [47]:
voting_clf.fit(np.hstack([hog_X_train, hog_canny_X_train, lbp_X_train, alex_X_train]), y_train)
y_pred_ensemble = voting_clf.predict(np.hstack([hog_X_test, hog_canny_X_test, lbp_X_test, alex_X_test]))
print('Classification Report')
print(classification_report(y_test, y_pred_ensemble))
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred_ensemble))

[LightGBM] [Info] Number of positive: 1308, number of negative: 1268
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021553 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205796
[LightGBM] [Info] Number of data points in the train set: 2576, number of used features: 1793
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507764 -> initscore=0.031058
[LightGBM] [Info] Start training from score 0.031058
[LightGBM] [Info] Number of positive: 1308, number of negative: 1268
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023694 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205796
[LightGBM] [Info] Number of data points in the train set: 2576, number of used features: 1793
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507764 -> initscore=0.031058
[LightGBM] [Info] Start training from score 0.031058
Classification

In [48]:
y_pred_ensemble = voting_clf.predict(np.hstack([hog_test_features, hog_canny_test_features, lbp_test_features, alex_test_features]))
print('Classification Report')
print(classification_report(test_labels, y_pred_ensemble))
print('Confusion Matrix')
print(confusion_matrix(test_labels, y_pred_ensemble))

Classification Report
              precision    recall  f1-score   support

           0       0.93      0.72      0.81      2019
           1       0.38      0.76      0.51       453

    accuracy                           0.73      2472
   macro avg       0.66      0.74      0.66      2472
weighted avg       0.83      0.73      0.76      2472

Confusion Matrix
[[1453  566]
 [ 107  346]]


In [49]:

# Model-Level Ensemble (Stacking)
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

# Define base models
base_learners = [
    ('hog', XGBClassifier(n_estimators=100, random_state=42)),
    ('hog_canny', LGBMClassifier(n_estimators=100, random_state=42)),
    ('lbp', XGBClassifier(n_estimators=100, random_state=42)),
    ('alex', LGBMClassifier(n_estimators=100, random_state=42))
]

# Define meta learner
meta_learner = LogisticRegression()

# Create stacking classifier
stacking_clf = StackingClassifier(estimators=base_learners, final_estimator=meta_learner, passthrough=True)

# Train stacking classifier
stacking_clf.fit(X_train, y_train)

# Predict and evaluate
y_pred_stacking = stacking_clf.predict(X_test)
print('Stacking Ensemble Accuracy:', accuracy_score(y_test, y_pred_stacking))
print('Classification Report')
print(classification_report(y_test, y_pred_stacking))
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred_stacking))

[LightGBM] [Info] Number of positive: 1308, number of negative: 1268
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022033 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205796
[LightGBM] [Info] Number of data points in the train set: 2576, number of used features: 1793
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507764 -> initscore=0.031058
[LightGBM] [Info] Start training from score 0.031058
[LightGBM] [Info] Number of positive: 1308, number of negative: 1268
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023828 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205796
[LightGBM] [Info] Number of data points in the train set: 2576, number of used features: 1793
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507764 -> initscore=0.031058
[LightGBM] [Info] Start training from score 0.031058
[LightGBM] [In

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [50]:
test_features_all = np.concatenate([hog_test_features, hog_canny_test_features, lbp_test_features, alex_test_features], axis=1)
y_pred_stacking = stacking_clf.predict(test_features_all)
print('Stacking Ensemble Accuracy:', accuracy_score(test_labels, y_pred_stacking))
print('Classification Report')
print(classification_report(test_labels, y_pred_stacking))
print('Confusion Matrix')
print(confusion_matrix(test_labels, y_pred_stacking))

Stacking Ensemble Accuracy: 0.6023462783171522
Classification Report
              precision    recall  f1-score   support

           0       0.86      0.61      0.72      2019
           1       0.24      0.56      0.34       453

    accuracy                           0.60      2472
   macro avg       0.55      0.59      0.53      2472
weighted avg       0.75      0.60      0.65      2472

Confusion Matrix
[[1235  784]
 [ 199  254]]
