In [13]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import cv2
import numpy as np

# 1- Data acquisition and selection

In [14]:
# Data acquisition and selection
train_data_dir = './colon_image_sets/training_set'
test_data_dir = './colon_image_sets/test_set'

# Remove corrupted or invalid images
def is_valid_image(img_path):
    try:
        img = cv2.imread(img_path)
        if img is None:
            return False
    except:
        return False
    return True

def remove_corrupted_images(dir_path):
    for root, dirs, files in os.walk(dir_path):
        for f in files:
            img_path = os.path.join(root, f)
            if not is_valid_image(img_path):
                os.remove(img_path)

remove_corrupted_images(train_data_dir)
remove_corrupted_images(test_data_dir)


# 2- Data Cleaning and Preprocessing

In [None]:
# Data cleaning and preprocessing

# Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary')

test_set = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary')


# 4- Methodology

### (feature extraction and selection)

In [9]:

# Define the CNN model
model = Sequential()

# Add the first convolutional layer
model.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))

# Add a max pooling layer
model.add(MaxPooling2D(pool_size = (2, 2)))

# Add the second convolutional layer
model.add(Conv2D(64, (3, 3), activation = 'relu'))

# Add another max pooling layer
model.add(MaxPooling2D(pool_size = (2, 2)))

# Flatten the layer
model.add(Flatten())

# Add a fully connected layer
model.add(Dense(units = 128, activation = 'relu'))

# Add a dropout layer to prevent overfitting
model.add(Dropout(rate = 0.5))

# Add the output layer
model.add(Dense(units = 1, activation = 'sigmoid'))


In [10]:
# Compile the model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [11]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 62, 62, 32)        896       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 31, 31, 32)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 14, 14, 64)       0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 12544)             0         
                                                                 
 dense_2 (Dense)             (None, 128)              

In [None]:
# Train the model
model.fit_generator(
    training_set,
    steps_per_epoch=8000,
    epochs=25,
    validation_data=test_set,
    validation_steps=2000)