In [2]:
pip install tensorflow keras numpy matplotlib --break-system-packages


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image

2024-10-31 13:25:28.650011: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-31 13:25:28.650491: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-31 13:25:28.653706: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-31 13:25:28.660927: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730361328.674589    8580 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730361328.67

In [4]:
dataset_path = 'Dataset2/FNA'
test_path = 'Dataset2/test'
categories = ['benign', 'malignant']

In [5]:
# Hyperparameters
batch_size = 32
img_height, img_width = 150, 150
epochs = 20
k = 5  # for K-Fold Cross Validation

In [6]:
# Step 1: Data Augmentation and Preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [8]:
# Load Data for K-Fold Cross Validation
def load_data():
    data = []
    labels = []
    for category in categories:
        path = os.path.join(dataset_path, category)
        class_num = categories.index(category)
        for img in os.listdir(path):
            img_path = os.path.join(path, img)
            try:
                img = image.load_img(img_path, target_size=(img_height, img_width))
                img_array = image.img_to_array(img)
                data.append(img_array)
                labels.append(class_num)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
    data = np.array(data) / 255.0  # Normalize images
    labels = np.array(labels)
    return data, labels

data, labels = load_data()

In [9]:
# Step 2: Define the CNN Model
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# Step 3: Implementing K-Fold Cross Validation
kf = KFold(n_splits=k, shuffle=True)
fold_no = 1
history_dict = {}

for train_index, val_index in kf.split(data):
    print(f"Training fold {fold_no}...")
    model = create_model()
    
    # Splitting data for the fold
    train_data, val_data = data[train_index], data[val_index]
    train_labels, val_labels = labels[train_index], labels[val_index]
    
    # Train model
    history = model.fit(
        train_data, train_labels,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(val_data, val_labels),
        verbose=2
    )
    
    # Save history for plotting
    history_dict[fold_no] = history
    fold_no += 1


Training fold 1...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-10-31 13:27:35.038777: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/20
44/44 - 13s - 284ms/step - accuracy: 0.6875 - loss: 0.5684 - val_accuracy: 0.8406 - val_loss: 0.3834
Epoch 2/20
44/44 - 11s - 250ms/step - accuracy: 0.8579 - loss: 0.3706 - val_accuracy: 0.8319 - val_loss: 0.4778
Epoch 3/20
44/44 - 11s - 245ms/step - accuracy: 0.8883 - loss: 0.3091 - val_accuracy: 0.8986 - val_loss: 0.2852
Epoch 4/20
44/44 - 13s - 288ms/step - accuracy: 0.8905 - loss: 0.3172 - val_accuracy: 0.8522 - val_loss: 0.4105
Epoch 5/20
44/44 - 14s - 313ms/step - accuracy: 0.8978 - loss: 0.2845 - val_accuracy: 0.8986 - val_loss: 0.2748
Epoch 6/20
44/44 - 14s - 310ms/step - accuracy: 0.9072 - loss: 0.2783 - val_accuracy: 0.9072 - val_loss: 0.2708
Epoch 7/20
44/44 - 14s - 329ms/step - accuracy: 0.9050 - loss: 0.3011 - val_accuracy: 0.8464 - val_loss: 0.4099
Epoch 8/20
44/44 - 14s - 329ms/step - accuracy: 0.8970 - loss: 0.2987 - val_accuracy: 0.8986 - val_loss: 0.3035
Epoch 9/20
44/44 - 15s - 352ms/step - accuracy: 0.9159 - loss: 0.2641 - val_accuracy: 0.9014 - val_loss:

In [None]:
# Step 4: Plot Training and Validation Loss/Accuracy
def plot_metrics(history_dict):
    for fold, history in history_dict.items():
        plt.plot(history.history['accuracy'], label=f'Train Acc Fold {fold}')
        plt.plot(history.history['val_accuracy'], label=f'Val Acc Fold {fold}')
    
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')
    plt.show()

plot_metrics(history_dict)

In [None]:
# Step 5: Load and Predict Unlabelled Test Images
test_images = []
for img_name in os.listdir(test_path):
    img_path = os.path.join(test_path, img_name)
    img = image.load_img(img_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Model expects a batch of images
    test_images.append((img_name, img_array / 255.0))  # Normalize


In [None]:

# Use the final model to make predictions on test data
for img_name, img_array in test_images:
    prediction = model.predict(img_array)
    class_label = 'Malignant' if prediction[0][0] > 0.5 else 'Benign'
    print(f"Image: {img_name} | Prediction: {class_label}")


In [None]:
# Step 6: Save the model
model.save('22CS30032_A2/final_model.h5')