In [1]:
import os
import shutil
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

2024-04-18 10:28:38.398407: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-18 10:28:38.398501: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-18 10:28:38.557787: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
dataset_dir = "/kaggle/input/images-cancer"

# Path to where you want to store train and test datasets
base_output_dir = "/kaggle/working/"

In [3]:
# Create train and test directories if they don't exist
train_dir = os.path.join(base_output_dir, "train_dataset")
test_dir = os.path.join(base_output_dir, "test_dataset")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [4]:
# Percentage of data to be used for training
train_percent = 0.8

In [5]:
for root, dirs, files in os.walk(dataset_dir):
    for dir_name in dirs:
        # Check if the directory name is a label (0 or 1)
        if dir_name.isdigit():
            label = int(dir_name)
            images_dir = os.path.join(root, dir_name)
            
            # List all the image files in the directory
            image_files = [f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]
            
            # Shuffle the list of image files
            random.shuffle(image_files)
            
            # Calculate the number of images for training
            num_train = int(len(image_files) * train_percent)
            
            # Split the list into training and testing sets
            train_images = image_files[:num_train]
            test_images = image_files[num_train:]
            
            # Create label directories in train and test directories
            train_label_dir = os.path.join(train_dir, str(label))
            test_label_dir = os.path.join(test_dir, str(label))
            os.makedirs(train_label_dir, exist_ok=True)
            os.makedirs(test_label_dir, exist_ok=True)
            
            # Move images to train directory
            for img in train_images:
                src = os.path.join(images_dir, img)
                dst = os.path.join(train_label_dir, img)
                shutil.copy(src, dst)
            
            # Move images to test directory
            for img in test_images:
                src = os.path.join(images_dir, img)
                dst = os.path.join(test_label_dir, img)
                shutil.copy(src, dst)

In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [7]:
if tf.test.gpu_device_name():
    print('GPU found')
else:
    print("No GPU found. Please ensure you have GPU enabled runtime in Colab.")

GPU found


In [8]:
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    # Display the number of GPUs available
    print("Number of GPUs available:", len(gpus))
    # Display details of each GPU
    for gpu in gpus:
        print("Name:", gpu.name, "  Type:", gpu.device_type)
else:
    print("No GPUs available.")

Number of GPUs available: 2
Name: /physical_device:GPU:0   Type: GPU
Name: /physical_device:GPU:1   Type: GPU


In [9]:
# Define image dimensions and batch size
img_width, img_height = 150, 150
batch_size = 32

In [10]:
# Data augmentation for training set
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

# Only rescaling for validation set
test_datagen = ImageDataGenerator(rescale=1./255)

In [11]:
# Load the training dataset
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

# Load the testing dataset
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 266285 images belonging to 281 classes.
Found 100213 images belonging to 281 classes.


In [12]:
# Define the model architecture
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    return model

In [13]:
# Create a MirroredStrategy.
strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])

# Open a strategy scope.
with strategy.scope():
    # Everything that creates variables should be under the strategy scope.
    # In general this is only model construction & `compile`.
    model = create_model()
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

# Print model summary
model.summary()

  super().__init__(


In [14]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=10,
    validation_data=test_generator,
    validation_steps=test_generator.samples // batch_size
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1876s[0m 224ms/step - accuracy: 1.6171 - loss: 0.8681 - val_accuracy: 0.8443 - val_loss: 0.3666
Epoch 2/10
[1m   1/8321[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:53[0m 28ms/step - accuracy: 1.8125 - loss: 0.5628

  self.gen.throw(typ, value, traceback)


[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62us/step - accuracy: 1.0001 - loss: 0.1490 - val_accuracy: 0.9091 - val_loss: 0.1048
Epoch 3/10
[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1849s[0m 222ms/step - accuracy: 1.6923 - loss: 0.7188 - val_accuracy: 0.8629 - val_loss: 0.3283
Epoch 4/10
[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6us/step - accuracy: 0.9376 - loss: 0.2470 - val_accuracy: 0.5455 - val_loss: 0.4091
Epoch 5/10
[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1820s[0m 219ms/step - accuracy: 1.7029 - loss: 0.6921 - val_accuracy: 0.8591 - val_loss: 0.3381
Epoch 6/10
[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8us/step - accuracy: 0.8126 - loss: 0.3668 - val_accuracy: 1.0000 - val_loss: 0.0870
Epoch 7/10
[1m8321/8321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1820s[0m 218ms/step - accuracy: 1.7122 - loss: 0.6718 - val_accuracy: 0.8605 - val_loss: 0.3259
Epoch 8/10


In [15]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator, verbose=2)

# Convert test_acc to percentage and print with two decimal places
print('\nTest accuracy: {:.2f}%'.format(test_acc * 100))

3132/3132 - 71s - 23ms/step - accuracy: 0.8710 - loss: 0.3105

Test accuracy: 87.10%
