In [4]:
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

In [34]:
# Filter DataFrame where 'image view' is 'CC'
breast_cancer_df = pd.read_csv('CBIS-DDSM_combined.csv')
df_cc = breast_cancer_df[breast_cancer_df['image view'] == 'CC']
df_cc.head()

Unnamed: 0,pathology,left or right breast,image view,abnormality type,image_path
0,MALIGNANT,LEFT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.342386...
2,BENIGN,LEFT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.891800...
5,MALIGNANT,RIGHT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.392091...
8,BENIGN,RIGHT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.353764...
10,BENIGN,LEFT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.145557...


In [17]:
# Function to preprocess the images using OpenCV and InceptionV3's required preprocessing
def load_and_preprocess_image(image_path, img_size=(299, 299)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, img_size)  # Resize to target size (299x299 for InceptionV3)
    img = img.astype('float32') / 255.0  # Normalize pixel values (0 to 1)
    return img

In [35]:
# Label encoding (MALIGNANT = 1, BENIGN = 0)
df_cc['pathology'] = df_cc['pathology'].map({'MALIGNANT': 1, 'BENIGN': 0})

# Prepare image paths and labels
image_paths = df_cc['image_path'].values
labels = df_cc['pathology'].values

# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    image_paths, labels, test_size=0.2, random_state=42, stratify=labels
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cc['pathology'] = df_cc['pathology'].map({'MALIGNANT': 1, 'BENIGN': 0})


In [36]:
df_cc.head()

Unnamed: 0,pathology,left or right breast,image view,abnormality type,image_path
0,1,LEFT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.342386...
2,0,LEFT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.891800...
5,1,RIGHT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.392091...
8,0,RIGHT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.353764...
10,0,LEFT,CC,mass,CBIS-DDSM/jpeg/1.3.6.1.4.1.9590.100.1.2.145557...


In [37]:
# Preprocess images using OpenCV
X_train_processed = np.array([load_and_preprocess_image(img) for img in X_train])
X_test_processed = np.array([load_and_preprocess_image(img) for img in X_test])

# Convert labels to categorical (for binary classification)
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

In [38]:
from tensorflow.keras.applications import InceptionV3

# Load the InceptionV3 model, pre-trained on ImageNet, without the top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

# Freeze the base model layers
base_model.trainable = False
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(2, activation='softmax')  # Binary classification (Benign, Malignant)
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), 
              loss='binary_crossentropy', 
              metrics=['accuracy'])


In [39]:
print(X_train_processed.shape)  # Should output (num_samples, 299, 299, 3)
print(X_test_processed.shape)   # Should output (num_samples, 299, 299, 3)


(1076, 299, 299, 3)
(270, 299, 299, 3)


In [40]:
# Train the model
history = model.fit(
    X_train_processed, y_train,
    epochs=5,
    validation_data=(X_test_processed, y_test),
    batch_size=32
)

Epoch 1/5
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 2s/step - accuracy: 0.5380 - loss: 0.7234 - val_accuracy: 0.5852 - val_loss: 0.6690
Epoch 2/5
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 2s/step - accuracy: 0.6086 - loss: 0.6559 - val_accuracy: 0.5963 - val_loss: 0.6551
Epoch 3/5
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 2s/step - accuracy: 0.6334 - loss: 0.6403 - val_accuracy: 0.5815 - val_loss: 0.6464
Epoch 4/5
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 2s/step - accuracy: 0.6237 - loss: 0.6406 - val_accuracy: 0.5926 - val_loss: 0.6410
Epoch 5/5
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 2s/step - accuracy: 0.6376 - loss: 0.6264 - val_accuracy: 0.6185 - val_loss: 0.6400


In [41]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test_processed, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.6265 - loss: 0.6310
Test Accuracy: 0.6185
