In [None]:
import pydicom
import os
import cv2

def convert_dcm_to_png_recursive(parent_dir, png_dir):
    """Recursively converts all DCM images within a directory and its subdirectories to PNG format."""

    for root, _, files in os.walk(parent_dir):  # Walk through all subdirectories
        for filename in files:
            if filename.endswith(".dcm"):
                dcm_path = os.path.join(root, filename)
                png_path = os.path.join(png_dir, os.path.relpath(root, parent_dir),
                                         os.path.splitext(filename)[0] + ".png")

                try:
                    ds = pydicom.read_file(dcm_path)
                    pixel_array = ds.pixel_array

                    # Convert to grayscale if needed
                    if len(pixel_array.shape) == 2:
                        img = cv2.cvtColor(pixel_array, cv2.COLOR_GRAY2BGR)
                    else:
                        img = pixel_array

                    os.makedirs(os.path.dirname(png_path), exist_ok=True)  # Create output directories if needed
                    cv2.imwrite(png_path, img)  # Use cv2.imwrite for PNG as well
                    print(f"Converted {filename} to PNG format.")

                except Exception as e:
                    print(f"Error converting {filename}: {e}")

# Example usage:
parent_dir = "G:/ChineseCheck/Malignant"  # Replace with the actual path
png_dir = "G:/ChineseCheck/MalignantP"  # Replace with the desired output path
convert_dcm_to_png_recursive(parent_dir, png_dir)


In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# Image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 8

# ... (GPU availability and memory growth code from previous response)
def label_images(directory, target_size=(img_width, img_height), max_images_per_class=1000):
    images = []
    labels = []
    class_labels = {'Malignant': 0, 'Benign': 1}

    for class_label, class_index in class_labels.items():
        class_path = os.path.join(directory, class_label)
        image_count = 0
        for root, _, files in os.walk(class_path):
            for filename in files:
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    if image_count < max_images_per_class:
                        file_path = os.path.join(root, filename)
                        image = load_img(file_path, target_size=target_size)  # Load with GPU
                        image = img_to_array(image)
                        image = preprocess_input(image)  # Process the image after loading
                        images.append(image)
                        labels.append(class_index)
                        image_count += 1
                    else:
                        break  # Stop processing this class if max images reached

    return np.array(images), np.array(labels)

# Example usage:
directory_path = 'G:/ChineseCheck/'
X, y = label_images(directory_path, target_size=(img_width, img_height))

# X contains the resized images, and y contains the corresponding labels
print(f"Total Images: {len(X)}")
print(f"Shape of an Image: {X[0].shape}")
print(f"Labels: {y}")

Total Images: 2000
Shape of an Image: (224, 224, 3)
Labels: [0 0 0 ... 1 1 1]


In [2]:
from sklearn.model_selection import train_test_split

# Assuming X and y are the images and labels obtained from the previous code
# X, y = label_images(directory_path)

# Split the data into 90% training and 10% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Print the sizes of the training and testing sets
print(f"Training Set: {len(X_train)} samples")
print(f"Testing Set: {len(X_test)} samples")

Training Set: 1800 samples
Testing Set: 200 samples


In [3]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
import numpy as np

# Assuming you have three classes (malignant, benign, normal)
num_classes = 2
img_width, img_height = 224, 224  # Adjust these dimensions based on your data

def build_vgg19_model(learn_rate=0.001, momentum=0.9):
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

    # Freeze all layers except the last three
    for layer in base_model.layers[:-3]:
        layer.trainable = False

    model = models.Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(num_classes, activation='softmax'))

    optimizer = SGD(learning_rate=learn_rate, momentum=momentum)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Build a new VGG19 model
vgg19_model = build_vgg19_model()

# Number of folds for k-fold cross-validation
num_folds = 5
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=110)

for fold, (train_index, val_index) in enumerate(kfold.split(X, y), 1):
    X_train_fold, X_val_fold = X[train_index], X[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # Convert labels to one-hot encoding
    y_train_fold_one_hot = to_categorical(y_train_fold, num_classes=num_classes)
    y_val_fold_one_hot = to_categorical(y_val_fold, num_classes=num_classes)

    # Train the VGG19 model
    class_weights = {0: 1.0, 1: 3.0}  # Adjust the weights based on class imbalance

    history = vgg19_model.fit(
        X_train_fold,
        y_train_fold_one_hot,
        epochs=10,
        validation_data=(X_val_fold, y_val_fold_one_hot),
        batch_size=8,
        verbose=1,
#         class_weight=class_weights
    )

    # Evaluate the model on the validation set
    val_loss, val_acc = vgg19_model.evaluate(X_val_fold, y_val_fold_one_hot, verbose=0)
    print(f"Validation Accuracy for Fold {fold}: {val_acc * 100:.2f}%")


Epoch 1/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m692s[0m 3s/step - accuracy: 0.5362 - loss: 0.9134 - val_accuracy: 0.5300 - val_loss: 0.7072
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m692s[0m 3s/step - accuracy: 0.5375 - loss: 0.7520 - val_accuracy: 0.4900 - val_loss: 0.7903
Epoch 3/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m663s[0m 3s/step - accuracy: 0.5441 - loss: 0.7259 - val_accuracy: 0.5375 - val_loss: 0.8339
Epoch 4/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m677s[0m 3s/step - accuracy: 0.5274 - loss: 0.7833 - val_accuracy: 0.4750 - val_loss: 1.6466
Epoch 5/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m665s[0m 3s/step - accuracy: 0.5353 - loss: 0.8008 - val_accuracy: 0.4950 - val_loss: 0.9240
Epoch 6/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m645s[0m 3s/step - accuracy: 0.5954 - loss: 0.7022 - val_accuracy: 0.5800 - val_loss: 0.7028
Epoch 7/10
[1m200/200

[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1244s[0m 6s/step - accuracy: 0.9129 - loss: 0.2191 - val_accuracy: 0.7375 - val_loss: 0.6511
Validation Accuracy for Fold 5: 73.75%


In [4]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
import numpy as np

# Assuming you have three classes (malignant, benign, normal)
num_classes = 2
img_width, img_height = 224, 224  # Adjust these dimensions based on your data

def build_vgg19_model(learn_rate=0.0001, momentum=0.9):
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

    # Freeze all layers except the last three
    for layer in base_model.layers[:-3]:
        layer.trainable = False

    model = models.Sequential()
    model.add(base_model)
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(num_classes, activation='softmax'))

    optimizer = SGD(learning_rate=learn_rate, momentum=momentum)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Build a new VGG19 model
vgg19_model = build_vgg19_model()

# Number of folds for k-fold cross-validation
num_folds = 5
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=110)

for fold, (train_index, val_index) in enumerate(kfold.split(X, y), 1):
    X_train_fold, X_val_fold = X[train_index], X[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # Convert labels to one-hot encoding
    y_train_fold_one_hot = to_categorical(y_train_fold, num_classes=num_classes)
    y_val_fold_one_hot = to_categorical(y_val_fold, num_classes=num_classes)

    # Train the VGG19 model
    class_weights = {0: 1.0, 1: 3.0}  # Adjust the weights based on class imbalance

    history = vgg19_model.fit(
        X_train_fold,
        y_train_fold_one_hot,
        epochs=10,
        validation_data=(X_val_fold, y_val_fold_one_hot),
        batch_size=8,
        verbose=1,
#         class_weight=class_weights
    )

    # Evaluate the model on the validation set
    val_loss, val_acc = vgg19_model.evaluate(X_val_fold, y_val_fold_one_hot, verbose=0)
    print(f"Validation Accuracy for Fold {fold}: {val_acc * 100:.2f}%")


Epoch 1/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1252s[0m 6s/step - accuracy: 0.5325 - loss: 0.9227 - val_accuracy: 0.5675 - val_loss: 0.7821
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m677s[0m 3s/step - accuracy: 0.6059 - loss: 0.6767 - val_accuracy: 0.5125 - val_loss: 0.9176
Epoch 3/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m614s[0m 3s/step - accuracy: 0.6509 - loss: 0.6185 - val_accuracy: 0.6000 - val_loss: 0.6977
Epoch 4/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m610s[0m 3s/step - accuracy: 0.6691 - loss: 0.6138 - val_accuracy: 0.5725 - val_loss: 0.7736
Epoch 5/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m615s[0m 3s/step - accuracy: 0.7301 - loss: 0.5706 - val_accuracy: 0.5550 - val_loss: 0.7592
Epoch 6/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m617s[0m 3s/step - accuracy: 0.7242 - loss: 0.5482 - val_accuracy: 0.5725 - val_loss: 0.7933
Epoch 7/10
[1m200/20

[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m620s[0m 3s/step - accuracy: 0.9597 - loss: 0.0977 - val_accuracy: 0.9875 - val_loss: 0.0366
Validation Accuracy for Fold 5: 98.75%


In [5]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Function to calculate and print evaluation metrics
def evaluate_model_multi_class(model, X, y_true):
    # Predictions
    y_pred = vgg19_model.predict(X)

    # Convert one-hot encoding to class labels
    y_true_labels = np.argmax(y_true, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = accuracy_score(y_true_labels, y_pred_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Confusion Matrix
    cm = confusion_matrix(y_true_labels, y_pred_labels)
    print("Confusion Matrix:")
    print(cm)

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_true_labels, y_pred_labels))

# Assuming you have trained the model 'inception_model' and loaded the test set 'X_val_fold', 'y_val_fold_one_hot'
evaluate_model_multi_class(vgg19_model, X_val_fold, y_val_fold_one_hot)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 12s/step
Accuracy: 98.75%
Confusion Matrix:
[[196   4]
 [  1 199]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       200
           1       0.98      0.99      0.99       200

    accuracy                           0.99       400
   macro avg       0.99      0.99      0.99       400
weighted avg       0.99      0.99      0.99       400

