<a href="https://colab.research.google.com/github/bhaveshgupta01/BCancerGogo/blob/main/GOGO_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications.vgg16 import preprocess_input


# Image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 16

def label_images(directory, target_size=(img_width, img_height)):
    images = []
    labels = []
    class_labels = {'malignant': 0, 'benign': 1, 'normal': 2}

    for class_label, class_index in class_labels.items():
        class_path = os.path.join(directory, class_label)
        for filename in os.listdir(class_path):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                file_path = os.path.join(class_path, filename)
                image = cv2.imread(file_path)  # You can use PIL.Image.open() as well
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB format
                image = cv2.resize(image, target_size)  # Resize the image to target size
                image = preprocess_input(image)  # Preprocess the image for VGG16
                images.append(image)
                labels.append(class_index)

    return np.array(images), np.array(labels)


# Example usage:
directory_path = '/content/drive/MyDrive/MIASdata'
X, y = label_images(directory_path, target_size=(img_width, img_height))

# X contains the resized images, and y contains the corresponding labels
print(f"Total Images: {len(X)}")
print(f"Shape of an Image: {X[0].shape}")
print(f"Labels: {y}")


Total Images: 321
Shape of an Image: (224, 224, 3)
Labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [None]:
from sklearn.model_selection import train_test_split

# Assuming X and y are the images and labels obtained from the previous code
# X, y = label_images(directory_path)

# Split the data into 90% training and 10% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Print the sizes of the training and testing sets
print(f"Training Set: {len(X_train)} samples")
print(f"Testing Set: {len(X_test)} samples")


Training Set: 288 samples
Testing Set: 33 samples


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.optimizers import SGD

# Assuming you have three classes (malignant, benign, normal)
num_classes = 3

def build_vgg_model(learn_rate=1e-4, momentum=0.9):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

    # Freeze all layers except the last three
    for layer in base_model.layers[:-3]:
        layer.trainable = False

    model = models.Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(layers.Dense(num_classes, activation='softmax'))  # Change activation to 'softmax'

    optimizer = SGD(learning_rate=learn_rate, momentum=momentum)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# ... (your data loading and preprocessing code)
vgg_model = build_vgg_model()

# Number of folds for k-fold cross-validation
num_folds = 5
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

for fold, (train_index, val_index) in enumerate(kfold.split(X, y), 1):
    X_train_fold, X_val_fold = X[train_index], X[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # Convert labels to one-hot encoding
    y_train_fold_one_hot = to_categorical(y_train_fold, num_classes=num_classes)
    y_val_fold_one_hot = to_categorical(y_val_fold, num_classes=num_classes)

    # Build a new VGG16 model for each fold

    # Train the VGG16 model
    class_weights = {0: 4.0, 1: 4.0, 2: 1.0}  # Adjust the weights based on class imbalance

    history = vgg_model.fit(
        X_train_fold,
        y_train_fold_one_hot,
        epochs=10,
        validation_data=(X_val_fold, y_val_fold_one_hot),
        batch_size=32,
        verbose=1,
        class_weight=class_weights
    )

    # Evaluate the model on the validation set
    val_loss, val_acc = vgg_model.evaluate(X_val_fold, y_val_fold_one_hot, verbose=0)
    print(f"Validation Accuracy for Fold {fold}: {val_acc * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Accuracy for Fold 1: 46.15%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Accuracy for Fold 2: 96.88%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Accuracy for Fold 3: 100.00%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Accuracy for Fold 4: 96.88%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Accuracy for Fold 5: 98.44%


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

y_test_one_hot=to_categorical(y_test, num_classes=num_classes)

# Function to calculate and print evaluation metrics
def evaluate_model_multi_class(model, X, y_true):
    # Predictions
    y_pred = model.predict(X)

    # Convert one-hot encoding to class labels
    y_true_labels = np.argmax(y_true, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(y_true_labels, y_pred_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Confusion Matrix
    cm = confusion_matrix(y_true_labels, y_pred_labels)
    print("Confusion Matrix:")
    print(cm)

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_true_labels, y_pred_labels))

# Assuming you have trained the model 'inception_model' and loaded the test set 'X_val_fold', 'y_val_fold_one_hot'
evaluate_model_multi_class(vgg_model, X_test, y_test_one_hot)


Accuracy: 100.00%
Confusion Matrix:
[[ 4  0  0]
 [ 0  9  0]
 [ 0  0 20]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        20

    accuracy                           1.00        33
   macro avg       1.00      1.00      1.00        33
weighted avg       1.00      1.00      1.00        33



................................................................................................................................................................


IGNORE `\/`

In [None]:
# Evaluate the overall performance on the test set
test_loss, test_acc = vgg_model.evaluate(X_test, y_test, verbose=0)
print(f"\nOverall Test Accuracy: {test_acc * 100:.2f}%")



Overall Test Accuracy: 27.27%


In [None]:
# Number of folds for k-fold cross-validation
num_folds = 5
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

for fold, (train_index, val_index) in enumerate(kfold.split(X_train, y_train), 1):
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

    print(f"Fold {fold} - Training Set: {len(X_train_fold)} samples, Validation Set: {len(X_val_fold)} samples")

    # Rest of your code (build model, train, evaluate) goes here

    # Example: Print the first few labels in the training and validation sets
    print(f"First few labels in training set: {y_train_fold[:5]}")
    print(f"First few labels in validation set: {y_val_fold[:5]}")
    print("\n")


In [None]:
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from sklearn.model_selection import train_test_split

# # Data augmentation
# datagen = ImageDataGenerator(
#     rotation_range=0,  # We will manually apply rotation
#     vertical_flip=True,
#     # No preprocessing_function specified for denoising or other operations
# )

# # Split the data into 90% training and 10% testing
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# # Apply augmentation to training data
# X_train_augmented = []
# y_train_augmented = []

# # for i in range(len(X_train)):
# #     # Apply rotation only to 90, 180, 270, and 0 degrees
# #     for angle in [0, 90, 180, 270]:
# #         rotated_image = datagen.apply_transform(X_train[i], {'theta': angle})
# #         X_train_augmented.append(rotated_image)
# #         y_train_augmented.append(y_train[i])
# # Apply augmentation to training data
# train_iterator = datagen.flow(X_train, y_train, batch_size=len(X_train), shuffle=False)
# X_train_augmented, y_train_augmented = train_iterator.next()

# X_train_augmented = np.array(X_train_augmented)
# y_train_augmented = np.array(y_train_augmented)

# # Print the sizes of the training and testing sets
# print(f"Original Training Set: {len(X_train)} samples")
# print(f"Augmented Training Set: {len(X_train_augmented)} samples")
# print(f"Original Testing Set: {len(X_test)} samples")

In [None]:
from scipy.ndimage import morphology  # Import the morphology module from scipy

# Image preprocessing including noise removal, histogram equalization, and morphological analysis
def preprocess_image(img):
    # Apply denoising
    denoised_img = cv2.medianBlur(img, 5)
    # Apply morphological operations
    eroded_img = morphology.binary_erosion(denoised_img).astype(np.uint8) * 255
    dilated_img = morphology.binary_dilation(eroded_img).astype(np.uint8) * 255
    # Normalize to [0, 1]
    normalized_img = dilated_img / 255.0
    return normalized_img


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split


# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=360,  # Rotate at any angle within the specified range
    vertical_flip=True,
    #preprocessing_function=preprocess_image  # Denoising
)

# Split the data into 90% training and 10% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Apply augmentation to training data
train_iterator = datagen.flow(X_train, y_train, batch_size=len(X_train), shuffle=False)
X_train_augmented, y_train_augmented = train_iterator.next()

# Apply morphological operations to training data
X_train_augmented_morph = np.array([cv2.morphologyEx(img, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8)) for img in X_train_augmented])

# Apply the same augmentation and morphological operations to test data
X_test_augmented = datagen.standardize(X_test)
X_test_augmented_morph = np.array([cv2.morphologyEx(img, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8)) for img in X_test_augmented])

# Print the sizes of the training and testing sets
print(f"Original Training Set: {len(X_train)} samples")
print(f"Augmented Training Set: {len(X_train_augmented_morph)} samples")
print(f"Original Testing Set: {len(X_test)} samples")
print(f"Augmented Testing Set: {len(X_test_augmented_morph)} samples")

In [None]:
from sklearn.model_selection import KFold, train_test_split

# Assuming X and y are the images and labels obtained from the previous code
# X, y = label_images(directory_path)

# Number of splits for k-fold cross-validation
n_splits = 5

# Initialize k-fold cross-validation
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Outer loop for k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(X, y), 1):
    X_train_kfold, X_test_kfold = X[train_index], X[test_index]
    y_train_kfold, y_test_kfold = y[train_index], y[test_index]

    # Inner loop for splitting the training set into train and validation
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_kfold, y_train_kfold, test_size=0.2, random_state=42
    )

    # Now you can train your model using X_train and y_train, and validate using X_val and y_val
    # ...

    # At the end of each fold, you can evaluate on the test set (X_test_kfold, y_test_kfold)
    # ...

    print(f"\nFold {fold} - Train Set: {len(X_train)}, Validation Set: {len(X_val)}, Test Set: {len(X_test_kfold)}")


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import SGD


# Define the base model (VGG16 without top layers)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

# Freeze the layers
for layer in base_model.layers:
    layer.trainable = False

# # Custom model for breast cancer detection
# def build_model(learn_rate=1e-4):
#     model = models.Sequential()
#     model.add(base_model)
#     model.add(layers.Flatten())
#     model.add(layers.Dense(512, activation='relu'))
#     model.add(BatchNormalization())  # Add Batch Normalization
#     model.add(layers.Dense(1, activation='sigmoid'))

#     optimizer = Adam(learning_rate=0.001)

#     model.compile(
#         optimizer=optimizer,
#         loss='binary_crossentropy',
#         metrics=['accuracy']
#     )

#     return model
def build_fine_tuned_vgg_model(learn_rate=1e-4, momentum=0.9):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

    # Freeze all layers except the last three
    for layer in base_model.layers[:-3]:
        layer.trainable = False

    model = models.Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(layers.Dropout(0.5))  # Add dropout for regularization
    model.add(layers.Dense(3, activation='softmax'))  # Change activation to 'softmax'

    # model.add(layers.Dense(1, activation='sigmoid'))

    optimizer = SGD(learning_rate=learn_rate, momentum=momentum)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

# Example usage:
fine_tuned_vgg_model = build_fine_tuned_vgg_model()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding
y_train_fold_one_hot = to_categorical(y_train_fold, num_classes=3)
y_val_fold_one_hot = to_categorical(y_val_fold, num_classes=3)

# Assuming X and y are the images and labels obtained from the previous code
# X, y = label_images(directory_path)

# Number of folds for k-fold cross-validation
num_folds = 5
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

for fold, (train_index, val_index) in enumerate(kfold.split(X_train, y_train), 1):

    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
    print(f"\nFold {fold} - Training Set: {len(X_train_fold)} samples, Validation Set: {len(X_val_fold)} samples")

    # Your model training and evaluation code goes here for each fold
    # Train the model
    class_weights = {0: 4.0, 1: 4.0, 2: 1.0}  # Adjust the weights based on class imbalance

    # Train the model
    history = fine_tuned_vgg_model.fit(
        X_train_fold,
        y_train_fold_one_hot,  # Use one-hot encoded labels
        epochs=20,
        validation_data=(X_val_fold, y_val_fold_one_hot),  # Use one-hot encoded labels
        batch_size=32,
        verbose=1,
        class_weight=class_weights
    )

    # Evaluate the model on the validation set
    val_loss, val_acc = fine_tuned_vgg_model.evaluate(X_val_fold, y_val_fold, verbose=0)
    print(f"Validation Accuracy for Fold {fold + 1}: {val_acc * 100:.2f}%")

NameError: ignored

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Function to calculate and print evaluation metrics
def evaluate_model_multi_class(model, X, y_true):
    # Predictions
    y_pred = vgg_model.predict(X)

    # Convert one-hot encoding to class labels
    y_true_labels = np.argmax(y_true, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(y_true_labels, y_pred_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Confusion Matrix
    cm = confusion_matrix(y_true_labels, y_pred_labels)
    print("Confusion Matrix:")
    print(cm)

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_true_labels, y_pred_labels))

# Assuming you have trained the model 'inception_model' and loaded the test set 'X_val_fold', 'y_val_fold_one_hot'
evaluate_model_multi_class(vgg_model, X_val_fold, y_val_fold_one_hot)


Accuracy: 98.44%
Confusion Matrix:
[[10  0  0]
 [ 0 11  1]
 [ 0  0 42]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.92      0.96        12
           2       0.98      1.00      0.99        42

    accuracy                           0.98        64
   macro avg       0.99      0.97      0.98        64
weighted avg       0.98      0.98      0.98        64

