In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from sklearn.model_selection import train_test_split
from shutil import copyfile

mainDataDir='/kaggle/input/multi-cancer/Multi Cancer/Breast Cancer'
benignDir=os.path.join(mainDataDir,'breast_benign')
malignantDir=os.path.join(mainDataDir,'breast_malignant')

In [3]:
trainDir = '/kaggle/working/training_data'
valDir = '/kaggle/working/validation_data'

In [4]:
os.makedirs(trainDir,exist_ok=True)
os.makedirs(valDir,exist_ok=True)

In [5]:
def split_and_copy(class_dir, train_output_dir, val_output_dir):
    images = [os.path.join(class_dir, img) for img in os.listdir(class_dir) if img.endswith(('jpg', 'jpeg', 'png'))]

    train_images, val_images = train_test_split(images, test_size=0.2, random_state=42)

    os.makedirs(train_output_dir, exist_ok=True)
    os.makedirs(val_output_dir, exist_ok=True)

    for img in train_images:
        copyfile(img, os.path.join(train_output_dir, os.path.basename(img)))

    for img in val_images:
        copyfile(img, os.path.join(val_output_dir, os.path.basename(img)))

split_and_copy(benignDir,os.path.join(trainDir,'breast_benign'),os.path.join(valDir,'breast_benign'))
split_and_copy(malignantDir,os.path.join(trainDir,'breast_malignant'),os.path.join(valDir,'breast_malignant'))

## Data Augmentation

In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator=train_datagen.flow_from_directory(
    '/kaggle/working/training_data',
    target_size=(224,224),
    class_mode='binary',
    batch_size=32
)

val_generator=val_datagen.flow_from_directory(
    '/kaggle/working/validation_data',
    target_size=(224,224),
    class_mode='binary',
    batch_size=32
)

Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


## Training the data

In [7]:
from keras import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Dropout,Flatten,Input,InputLayer,GlobalAveragePooling1D,BatchNormalization

In [8]:
noOfClasses=len(train_generator.class_indices)
noOfClasses

2

In [9]:
imageShape=(224,224,3)

In [98]:
from tensorflow.keras.regularizers import l2

model = Sequential([
    Input(shape=imageShape),
    Conv2D(32,(3,3),activation='relu',kernel_regularizer=l2(0.001)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(32,(3,3),activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.2),
    Flatten(),
    Dense(128,activation='relu'),
    Dense(noOfClasses,activation='sigmoid')
])

In [99]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [100]:
model.summary()

In [105]:
from keras.callbacks import EarlyStopping
earlystop= EarlyStopping(monitor='val_loss', patience=4,mode="min")

In [106]:
batch_size=32
steps_per_epoch = len(train_generator) // batch_size  
validation_steps = len(val_generator) // batch_size  
history = model.fit(train_generator,validation_data=val_generator,epochs=30,steps_per_epoch=steps_per_epoch,validation_steps=validation_steps,callbacks=[earlystop])

Epoch 1/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.8674 - loss: 0.3452 - val_accuracy: 0.8750 - val_loss: 0.2135
Epoch 2/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.8343 - loss: 0.3846 - val_accuracy: 0.9062 - val_loss: 0.3032
Epoch 3/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.8929 - loss: 0.3438 - val_accuracy: 0.9062 - val_loss: 0.2546
Epoch 4/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.8598 - loss: 0.3315 - val_accuracy: 0.8750 - val_loss: 0.2993
Epoch 5/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.8375 - loss: 0.3848 - val_accuracy: 0.8438 - val_loss: 0.3031


In [108]:
# Evaluate on the training generator
train_results = model.evaluate(train_generator,batch_size=32)
print("Train loss, Train accuracy:", train_results)

# Evaluate on the validation generator
val_results = model.evaluate(val_generator)
print("Validation loss, Validation accuracy:", val_results)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 842ms/step - accuracy: 0.8639 - loss: 0.3671
Train loss, Train accuracy: [0.36650270223617554, 0.8642500042915344]
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 398ms/step - accuracy: 0.8039 - loss: 0.6136
Validation loss, Validation accuracy: [0.5477408766746521, 0.8295000195503235]


In [109]:
def savingModel(model, model_name="model"): 
    model_filename = f"{model_name}.h5"
    model.save(model_filename)
    print(f"Model saved as {model_filename}")

In [110]:
savingModel(model,model_name="breastCancer")

Model saved as breastCancer.h5


In [111]:
from keras.models import load_model

In [112]:
def loadingModel(model_name="model"): 
    try:
        model = load_model(f"{model_name}.h5")
        model.summary()  # Print model summary if needed
        return model  # Return the loaded model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

In [113]:
loadedModel=loadingModel("breastCancer")

In [114]:
def loadImg(imgPath): 
    img = tf.keras.preprocessing.image.load_img(imgPath, target_size=(224, 224))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array=img_array/255.0
    return img_array

In [115]:
import tensorflow as tf

In [116]:
def predictImagesInFolder(testFolder, class_name, model_name): 
    savedModel = loadingModel(model_name)
    print(savedModel)

    if savedModel is None:
        print("Model loading failed.")
        return

    for subdir, dirs, files in os.walk(testFolder): 
        if subdir == testFolder: 
            continue
        trueLabel = os.path.basename(subdir)
        for file in files: 
            img_path = os.path.join(subdir, file)
            img = loadImg(img_path)
            
            # Debugging prediction probabilities
            predictions = savedModel.predict(img)
#             print(f"Prediction Probabilities for {file}: {predictions}")

            predicted_class_index = np.argmax(predictions, axis=1)
            predicted_class = class_name[predicted_class_index[0]]
            print(f"Predicted class: {predicted_class} File: {file}")

In [117]:
def testFolder(folder): 
    testFolder=f"{folder}"
    return testFolder

In [118]:
breastCancerTestFolder=testFolder("/kaggle/working/validation_data")

In [119]:
breastCancerTestFolder

'/kaggle/working/validation_data'

In [120]:
breastCancerClassname={0:'breast_benign', 1:'breast_malignant'}

In [121]:
predictImagesInFolder(breastCancerTestFolder,breastCancerClassname,model_name="breastCancer")

<Sequential name=sequential_6, built=True>
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
Predicted class: breast_benign File: breast_benign_3171.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted class: breast_malignant File: breast_benign_1092.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted class: breast_benign File: breast_benign_4849.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Predicted class: breast_benign File: breast_benign_3995.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted class: breast_benign File: breast_benign_3149.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted class: breast_benign File: breast_benign_0410.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Predicted class: breast_benign File: breast_benign_3854.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━