In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
from sklearn.model_selection import StratifiedKFold

# Image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 8

# ... (GPU availability and memory growth code from previous response)
def label_images(directory, target_size=(img_width, img_height), max_images_per_class=1000):
    images = []
    labels = []
    class_labels = {'Malignant': 0, 'Benign': 1}

    for class_label, class_index in class_labels.items():
        class_path = os.path.join(directory, class_label)
        image_count = 0
        for root, _, files in os.walk(class_path):
            for filename in files:
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    if image_count < max_images_per_class:
                        file_path = os.path.join(root, filename)
                        image = load_img(file_path, target_size=target_size)  # Load with GPU
                        image = img_to_array(image)
                        image = preprocess_input(image)  # Process the image after loading
                        images.append(image)
                        labels.append(class_index)
                        image_count += 1
                    else:
                        break  # Stop processing this class if max images reached

    return np.array(images), np.array(labels)

# Example usage:
directory_path = 'G:/ChineseCheck/'
X, y = label_images(directory_path, target_size=(img_width, img_height))

# X contains the resized images, and y contains the corresponding labels
print(f"Total Images: {len(X)}")
print(f"Shape of an Image: {X[0].shape}")
print(f"Labels: {y}")

# Split the data into 90% training and 10% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Print the sizes of the training and testing sets
print(f"Training Set: {len(X_train)} samples")
print(f"Testing Set: {len(X_test)} samples")

from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical

# Assuming you have three classes (malignant, benign, normal)
num_classes = 2
img_width, img_height = 224, 224  # Adjust these dimensions based on your data

def build_vgg16_model(learn_rate=0.0001, momentum=0.9):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

    # Freeze all layers except the last three
    for layer in base_model.layers[:-3]:
        layer.trainable = False

    model = models.Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(num_classes, activation='softmax'))

    optimizer = SGD(learning_rate=learn_rate, momentum=momentum)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Build a new VGG16 model
vgg16_model = build_vgg16_model()

# Number of folds for k-fold cross-validation
num_folds = 5
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=110)

for fold, (train_index, val_index) in enumerate(kfold.split(X, y), 1):
    X_train_fold, X_val_fold = X[train_index], X[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # Convert labels to one-hot encoding
    y_train_fold_one_hot = to_categorical(y_train_fold, num_classes=num_classes)
    y_val_fold_one_hot = to_categorical(y_val_fold, num_classes=num_classes)

    # Train the VGG16 model
    class_weights = {0: 1.0, 1: 3.0}  # Adjust the weights based on class imbalance

    history = vgg16_model.fit(
        X_train_fold,
        y_train_fold_one_hot,
        epochs=10,
        validation_data=(X_val_fold, y_val_fold_one_hot),
        batch_size=8,
        verbose=1,
#         class_weight=class_weights
    )

    # Evaluate the model on the validation set
    val_loss, val_acc = vgg16_model.evaluate(X_val_fold, y_val_fold_one_hot, verbose=0)
    print(f"Validation Accuracy for Fold {fold}: {val_acc * 100:.2f}%")

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Function to calculate and print evaluation metrics for multi-class classification
def evaluate_model_multi_class(model, X, y_true):
    # Predictions
    y_pred = model.predict(X)

    # Convert one-hot encoding to class labels
    y_true_labels = np.argmax(y_true, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(y_true_labels, y_pred_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Confusion Matrix
    cm = confusion_matrix(y_true_labels, y_pred_labels)
    print("Confusion Matrix:")
    print(cm)

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_true_labels, y_pred_labels))

    # Assuming you have trained the model 'vgg16_model' and loaded the test set 'X_val_fold', 'y_val_fold_one_hot'
evaluate_model_multi_class(vgg16_model, X_val_fold, y_val_fold_one_hot)


Total Images: 2000
Shape of an Image: (224, 224, 3)
Labels: [0 0 0 ... 1 1 1]
Training Set: 1800 samples
Testing Set: 200 samples
Epoch 1/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m735s[0m 4s/step - accuracy: 0.5242 - loss: 0.9518 - val_accuracy: 0.5025 - val_loss: 1.1222
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m734s[0m 4s/step - accuracy: 0.6265 - loss: 0.6516 - val_accuracy: 0.5625 - val_loss: 0.8603
Epoch 3/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m722s[0m 4s/step - accuracy: 0.6522 - loss: 0.6377 - val_accuracy: 0.5850 - val_loss: 0.7003
Epoch 4/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m722s[0m 4s/step - accuracy: 0.6970 - loss: 0.5690 - val_accuracy: 0.5400 - val_loss: 0.8029
Epoch 5/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m723s[0m 4s/step - accuracy: 0.7221 - loss: 0.5430 - val_accuracy: 0.5525 - val_loss: 0.8969
Epoch 6/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0

[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 2s/step - accuracy: 0.9819 - loss: 0.0496 - val_accuracy: 0.9900 - val_loss: 0.0310
Epoch 10/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 2s/step - accuracy: 0.9924 - loss: 0.0266 - val_accuracy: 0.9950 - val_loss: 0.0337
Validation Accuracy for Fold 5: 99.50%
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 7s/step
Accuracy: 99.50%
Confusion Matrix:
[[200   0]
 [  2 198]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       200
           1       1.00      0.99      0.99       200

    accuracy                           0.99       400
   macro avg       1.00      0.99      0.99       400
weighted avg       1.00      0.99      0.99       400

