In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
import cv2

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Define paths
data_dir = 'training_set'
benign_dir = os.path.join(data_dir, 'benign')
malignant_dir = os.path.join(data_dir, 'malignant')

# Image parameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

FUNCTION TO LOAD ALL IMAGES

In [2]:
def load_images_from_folder(folder, label):
    """Load images from a folder and assign labels"""
    images = []
    labels = []

    for filename in os.listdir(folder):
        if filename.endswith('.png') and not filename.endswith('_mask.png'):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                images.append(img)
                labels.append(label)

    return images, labels

FUNCTION FOR FEATURE EXTRACTOR

In [3]:
def create_feature_extractor():
    """Create a CNN feature extractor"""
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5)
    ])

    return model

# MAIN FUNCTION

## Load and combine all dataset
## Process data for training and validation
## Extract features and train random forest model for prediction
## Evaluate using accuracy and F1-score.

In [11]:
def main():
    print("Loading benign images...")
    benign_images, benign_labels = load_images_from_folder(benign_dir, 0)  # 0 for benign

    print("Loading malignant images...")
    malignant_images, malignant_labels = load_images_from_folder(malignant_dir, 1)  # 1 for malignant

    # Combine datasets
    all_images = benign_images + malignant_images
    all_labels = benign_labels + malignant_labels

    print(f"Total images loaded: {len(all_images)}")
    print(f"Benign images: {len(benign_images)}")
    print(f"Malignant images: {len(malignant_images)}")

    # Convert to numpy arrays
    X = np.array(all_images)
    y = np.array(all_labels)

    # Normalize pixel values to [0, 1]
    X = X.astype('float32') / 255.0

    # Split into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    print(f"Training set: {len(X_train)} images")
    print(f"Validation set: {len(X_val)} images")

    # Create CNN feature extractor
    feature_extractor = create_feature_extractor()
    print("Feature extractor model created.")

    # Extract features from training set
    print("Extracting features from training set...")
    train_features = feature_extractor.predict(X_train, batch_size=BATCH_SIZE, verbose=1)

    # Extract features from validation set
    print("Extracting features from validation set...")
    val_features = feature_extractor.predict(X_val, batch_size=BATCH_SIZE, verbose=1)

    # Train Random Forest classifier
    print("Training Random Forest classifier...")
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(train_features, y_train)

    # Make predictions on validation set
    print("Making predictions on validation set...")
    y_pred = rf_classifier.predict(val_features)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred, average='weighted')
    report = classification_report(y_val, y_pred, target_names=['Benign', 'Malignant'])

    # Print results
    print("\n" + "="*50)
    print("MODEL EVALUATION RESULTS")
    print("="*50)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1-Score (weighted): {f1:.4f}")
    print("\nClassification Report:")
    print(report)
    print("="*50)



# Run Main Function

In [12]:
if __name__ == "__main__":
    main()

Loading benign images...
Loading malignant images...
Total images loaded: 565
Benign images: 404
Malignant images: 161
Training set: 452 images
Validation set: 113 images


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Feature extractor model created.
Extracting features from training set...
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 1s/step
Extracting features from validation set...
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step
Training Random Forest classifier...
Making predictions on validation set...

MODEL EVALUATION RESULTS
Accuracy: 0.8319
F1-Score (weighted): 0.8132

Classification Report:
              precision    recall  f1-score   support

      Benign       0.82      0.98      0.89        81
   Malignant       0.88      0.47      0.61        32

    accuracy                           0.83       113
   macro avg       0.85      0.72      0.75       113
weighted avg       0.84      0.83      0.81       113

