This file contains the training process of our classification model. We use a workflow that extracts learned features using ConvNeXt Tiny and DenseNet121, then concatenates these features before passing them to an SVM classifier.

link to our dataset on roboflow:
https://universe.roboflow.com/segmentationcmmd/classifbegninmalign-ddsm
original dataset:
https://www.cancerimagingarchive.net/analysis-result/tompei-cmmd/
https://www.cancerimagingarchive.net/collection/cbis-ddsm/

In [11]:

import os
import cv2
import numpy as np
from skimage.feature import hog, graycomatrix, graycoprops, local_binary_pattern
from skimage.measure import regionprops, label
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
def load_data(data_dir):
    images_grayscale = []
    images_rgb = []
    labels = []
    for dir in ['test','train','valid']:
      for label in ['benign', 'malignant']:
        class_dir = os.path.join(data_dir,dir, label)
        class_idx = 0 if label == 'benign' else 1

        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img_gray = cv2.resize(img_gray, (224, 224))
            images_grayscale.append(img_gray)
            img_rgb = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)
            images_rgb.append(img_rgb)
            labels.append(class_idx)

    return np.array(images_grayscale), np.array(images_rgb), np.array(labels)

In [12]:
data_dir = '/kaggle/working/ClassifBegninMalign+ddsm-2'
images_grayscale, images_rgb, labels = load_data(data_dir)
"""print("Extracting traditional features...")
traditional_features = extract_traditional_features(images_grayscale)"""
print("Extracting deep features...")
deep_features = extract_deep_features(images_rgb)
deep_features2 = extract_deep_features2(images_rgb)
X = np.hstack([deep_features2, deep_features])
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
scaler = StandardScaler()
X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)


Extracting deep features...
[1m151/151[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m25s[0m 125ms/step
Deep Features Shape: (4812, 7, 7, 1024)
[1m151/151[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m29s[0m 176ms/step
Deep Features Shape: (4812, 7, 7, 768)


In [25]:
# === Step 3: Train SVM on Selected Features ===
clf = SVC(kernel='sigmoid', C=6, gamma='scale', class_weight='balanced')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# === Step 4: Evaluation ===
print(f"‚úÖ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("üßæ Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
print("ROC AUC:", roc_auc_score(y_test, clf.decision_function(X_test)))
print(classification_report(y_test, y_pred, target_names=['Benign', 'Malignant']))

‚úÖ Accuracy: 0.9720
üßæ Confusion Matrix:
[[429  16]
 [ 11 507]]
ROC AUC: 0.9922476248318945
              precision    recall  f1-score   support

      Benign       0.97      0.96      0.97       445
   Malignant       0.97      0.98      0.97       518

    accuracy                           0.97       963
   macro avg       0.97      0.97      0.97       963
weighted avg       0.97      0.97      0.97       963



In [26]:
import joblib
joblib.dump(clf, 'svm+lgbmdensenet+convnexttiny+9720+c=10+k=sigmoid+augmentation.pkl')

['svm+lgbmdensenet+convnexttiny+9720+c=10+k=sigmoid+augmentation.pkl']