In [2]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm
from keras.models import load_model
from skimage.feature import graycomatrix, graycoprops
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
import seaborn as sns
import cv2
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from tensorflow.keras.layers import Input

In [3]:
# Function to extract texture features
def TextureFeatureExtract(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert image to grayscale
    g = graycomatrix(img, [1], [0, np.pi / 2], levels=256)
    energy = np.ravel(graycoprops(g, 'energy'))
    contrast = np.ravel(graycoprops(g, 'contrast'))
    correlation = np.ravel(graycoprops(g, 'correlation'))
    homogeneity = np.ravel(graycoprops(g, 'homogeneity'))
    dissimilarity = np.ravel(graycoprops(g, 'dissimilarity'))
    ASM = np.ravel(graycoprops(g, 'ASM'))
    return np.concatenate((energy, contrast, correlation, homogeneity, dissimilarity, ASM))

In [4]:
# List to store image paths and labels
image_paths = []
labels = []

In [5]:
# Path to your dataset
data_dir = r'E:\COLLEGE WORK\SEM 5\Machine Learning\Bone marrow classification\Dataset\bone_marrow_cell_dataset'

# Loop through class directories
for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)
    if os.path.isdir(class_dir):
        for sub_dir_name in os.listdir(class_dir):
            sub_dir_path = os.path.join(class_dir, sub_dir_name)
            if os.path.isdir(sub_dir_path):
                for img_name in tqdm(os.listdir(sub_dir_path), desc=f"Processing {class_name}/{sub_dir_name}"):
                    img_path = os.path.join(sub_dir_path, img_name)
                    if os.path.isfile(img_path):
                        image_paths.append(img_path)
                        labels.append(class_name)
            else:
                img_path = os.path.join(class_dir, sub_dir_name)
                if os.path.isfile(img_path):
                    image_paths.append(img_path)
                    labels.append(class_name)
    else:
        img_path = class_dir  # For classes without subdirectories
        if os.path.isfile(img_path):
            image_paths.append(img_path)
            labels.append(class_name)

Processing ART/0001-1000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 19746.26it/s]
Processing ART/10001-11000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 16789.10it/s]
Processing ART/1001-2000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 19976.02it/s]
Processing ART/11001-12000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17668.56it/s]
Processing ART/12001-13000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 18111.37it/s]
Processing ART/13001-14000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17516.77it/s]
Processing ART/14001-15000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17997.37it/s]
Processing ART/15001-16000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17525.85it/s]
Processing ART/16001-17000: 100%|███████████████████████████████

Processing LYT/12001-13000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 15512.34it/s]
Processing LYT/13001-14000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17505.81it/s]
Processing LYT/14001-15000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17722.84it/s]
Processing LYT/15001-16000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 16598.68it/s]
Processing LYT/16001-17000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 18693.36it/s]
Processing LYT/17001-18000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 15817.06it/s]
Processing LYT/18001-19000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 17726.81it/s]
Processing LYT/19001-20000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 18511.20it/s]
Processing LYT/20001-21000: 100%|███████████████████████████████

Processing NGS/28001-29000: 100%|████████████████████████████████████████████| 1000/1000 [00:00<00:00, 19204.51it/s]
Processing NGS/29001-29424: 100%|██████████████████████████████████████████████| 424/424 [00:00<00:00, 18112.04it/s]
Processing NGS/3001-4000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 20932.27it/s]
Processing NGS/4001-5000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 20700.75it/s]
Processing NGS/5001-6000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 21377.04it/s]
Processing NGS/6001-7000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 21248.06it/s]
Processing NGS/7001-8000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 20782.92it/s]
Processing NGS/8001-9000: 100%|██████████████████████████████████████████████| 1000/1000 [00:00<00:00, 20121.29it/s]
Processing NGS/9001-10000: 100%|████████████████████████████████

In [6]:
# Splitting the data into train, validation, and test sets
X_train_paths, X_temp_paths, y_train, y_temp = train_test_split(image_paths, labels, test_size=0.3, random_state=42)
X_val_paths, X_test_paths, y_val, y_test = train_test_split(X_temp_paths, y_temp, test_size=0.5, random_state=42)


In [7]:
# Load texture features from disk
X_train_texture = np.load(os.path.join('Downloads', r"C:\Users\HP\Downloads\X_train_texture.npy"))
X_val_texture = np.load(os.path.join('Downloads', r"C:\Users\HP\Downloads\X_val_texture.npy"))
X_test_texture = np.load(os.path.join('Downloads', r"C:\Users\HP\Downloads\X_test_texture.npy"))

In [8]:
# Convert to numpy arrays
X_train_texture = np.array(X_train_texture)
X_val_texture = np.array(X_val_texture)
X_test_texture = np.array(X_test_texture)

In [9]:
# Convert labels to numerical format
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)
num_classes = len(label_encoder.classes_)

In [10]:
# Convert numerical labels to one-hot encoded vectors
y_train_onehot = to_categorical(y_train_encoded, num_classes)
y_val_onehot = to_categorical(y_val_encoded, num_classes)
y_test_onehot = to_categorical(y_test_encoded, num_classes)

In [11]:
# Define input shapes for texture features
texture_input_shape = (X_train_texture.shape[1],)  # Shape of texture features
texture_input_shape

(12,)

In [12]:
X_train_texture[0]

array([2.45259278e-02, 2.46418163e-02, 2.29782008e+01, 2.34822008e+01,
       9.93182452e-01, 9.93041106e-01, 2.81879225e-01, 2.85214786e-01,
       3.42755020e+00, 3.40065863e+00, 6.01521137e-04, 6.07219109e-04])

In [13]:
X_train_texture.shape, X_test_texture.shape, X_val_texture.shape

((119961, 12), (25707, 12), (25706, 12))

In [14]:
# Define input layers for texture features
texture_input = Input(shape=texture_input_shape, name='texture_input')
texture_input

<KerasTensor: shape=(None, 12) dtype=float32 (created by layer 'texture_input')>

In [15]:
import time

# Define model parameters for each individual model
svm_params = {
    'model': SVC(),
    'params': {
        'C': [10],
        'kernel': ['rbf'],
        'gamma': ['scale']
    }
}

# random_forest_params = {
#     'model': RandomForestClassifier(),
#     'params': {
#         'n_estimators': [50, 100, 200],
#         'max_depth': [None, 10, 20, 30]
#     }
# }

# logistic_regression_params = {
#     'model': LogisticRegression(),
#     'params': {
#         'C': [0.1, 1, 10],
#         'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
#     }
# }

# decision_tree_params = {
#     'model': DecisionTreeClassifier(),
#     'params': {
#         'criterion': ['gini', 'entropy'],
#         'max_depth': [None, 10, 20, 30]
#     }
# }

In [None]:
# Results storage
scores = []

# Measure time for each model
start_time = time.time()

# Model: SVM
clf_svm = GridSearchCV(svm_params['model'], svm_params['params'], cv=5, return_train_score=False)
clf_svm.fit(X_train_texture, y_train_encoded)
y_pred_svm = clf_svm.predict(X_test_texture)
scores.append({
    'model': 'svm',
    'best_score': clf_svm.best_score_,
    'accuracy_score': accuracy_score(y_pred_svm, y_test_encoded),
    'best_params': clf_svm.best_params_
})
print("SVM Model completed in %.2f seconds" % (time.time() - start_time))

In [None]:
# # Results storage
# scores = []

# # Measure time for each model
# for model_name, params in zip(['svm', 'random_forest', 'logistic_regression', 'decision_tree'], 
#                               [svm_params, random_forest_params, logistic_regression_params, decision_tree_params]):
#     start_time = time.time()

#     clf = GridSearchCV(params['model'], params['params'], cv=5, return_train_score=False)
#     clf.fit(X_train_texture, y_train_encoded)
#     y_pred = clf.predict(X_test_texture)
    
#     scores.append({
#         'model': model_name,
#         'best_score': clf.best_score_,
#         'accuracy_score': accuracy_score(y_pred, y_test_encoded),
#         'best_params': clf.best_params_
#     })
    
#     elapsed_time = time.time() - start_time
#     remaining_time = elapsed_time * (len(model_params) - len(scores))
#     print(f"{model_name.capitalize()} Model completed in {elapsed_time:.2f} seconds. Estimated remaining time: {remaining_time:.2f} seconds.")

# # Convert scores to DataFrame
# df = pd.DataFrame(scores, columns=['model', 'best_score', 'accuracy_score', 'best_params'])
# print(df)