In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/Dataset.zip -d /content/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9042.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9043.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9044.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9045.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9046.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9047.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9049.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9052.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9054.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9057.jpg  
  inflating: /content/Dataset/validation/Normal/KVASIR/dc221ccc65d34010_9059.jpg  
  inflating: /content/

In [None]:
# How many images in each folder
import os

# Walk through the data
for dirpath,dirnames,filenames in os.walk("Dataset"):
  print(f"There are {len(dirnames)} directories and {len(filenames)} images in {dirpath}.")

There are 2 directories and 0 images in Dataset.
There are 10 directories and 2 images in Dataset/training.
There are 3 directories and 0 images in Dataset/training/Bleeding.
There are 0 directories and 312 images in Dataset/training/Bleeding/KVASIR.
There are 0 directories and 3 images in Dataset/training/Bleeding/KID.
There are 0 directories and 519 images in Dataset/training/Bleeding/SEE-AI.
There are 3 directories and 0 images in Dataset/training/Erosion.
There are 0 directories and 354 images in Dataset/training/Erosion/KVASIR.
There are 0 directories and 0 images in Dataset/training/Erosion/KID.
There are 0 directories and 2340 images in Dataset/training/Erosion/SEE-AI.
There are 3 directories and 0 images in Dataset/training/Erythema.
There are 0 directories and 111 images in Dataset/training/Erythema/KVASIR.
There are 0 directories and 0 images in Dataset/training/Erythema/KID.
There are 0 directories and 580 images in Dataset/training/Erythema/SEE-AI.
There are 3 directories a

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
from collections import Counter
from sklearn.utils import shuffle
from tensorflow.keras.utils import Sequence

class BalancedImageDataGenerator:
    def __init__(self, target_samples=5000, **kwargs):
        self.target_samples = target_samples
        self.image_data_generator = ImageDataGenerator(**kwargs)

    def flow_from_directory(self, directory, **kwargs):
        # First, get the base generator
        base_generator = self.image_data_generator.flow_from_directory(
            directory,
            shuffle=False,
            **kwargs
        )

        # Get all filenames and their corresponding labels
        filenames = base_generator.filenames
        labels = base_generator.classes
        class_indices = base_generator.class_indices
        n_classes = len(class_indices)

        # Count samples per class
        class_counts = Counter(labels)

        # Create balanced dataset
        balanced_filenames = []
        balanced_labels = []

        for class_idx in range(n_classes):
            class_files = [f for f, l in zip(filenames, labels) if l == class_idx]
            class_count = len(class_files)

            if class_count >= self.target_samples:
                # Undersample
                selected_files = shuffle(class_files)[:self.target_samples]
            else:
                # Oversample
                multiplier = self.target_samples // class_count
                remainder = self.target_samples % class_count
                selected_files = class_files * multiplier + shuffle(class_files)[:remainder]

            balanced_filenames.extend(selected_files)
            balanced_labels.extend([class_idx] * self.target_samples)

        # Shuffle the balanced dataset
        balanced_filenames, balanced_labels = shuffle(balanced_filenames, balanced_labels)

        class BalancedGenerator(Sequence):
            def __init__(self, parent, filenames, labels, directory, **kwargs):
                self.parent = parent
                self.filenames = filenames
                self.labels = labels
                self.directory = directory
                self.n = len(filenames)
                self.batch_size = kwargs.get('batch_size', 32)
                self.target_size = kwargs.get('target_size', (224, 224))
                self.shuffle = kwargs.get('shuffle', True)
                self.indices = np.arange(self.n)
                self.class_indices = class_indices

                if self.shuffle:
                    np.random.shuffle(self.indices)

            def __len__(self):
                return int(np.ceil(self.n / float(self.batch_size)))

            def __getitem__(self, idx):
                start_idx = idx * self.batch_size
                end_idx = min((idx + 1) * self.batch_size, self.n)
                batch_indices = self.indices[start_idx:end_idx]

                batch_files = [os.path.join(self.directory, self.filenames[i]) for i in batch_indices]
                batch_labels = [self.labels[i] for i in batch_indices]

                # Load and preprocess images
                batch_images = []
                for f in batch_files:
                    # Load image
                    img = tf.keras.utils.load_img(f, target_size=self.target_size)
                    # Convert to array
                    img_array = tf.keras.utils.img_to_array(img)
                    # Apply standardization
                    img_array = self.parent.image_data_generator.standardize(img_array)
                    # Apply random transformations
                    img_array = self.parent.image_data_generator.random_transform(img_array)
                    batch_images.append(img_array)

                batch_images = np.array(batch_images)

                # Convert labels to categorical
                batch_labels = tf.keras.utils.to_categorical(batch_labels, n_classes)

                return batch_images, batch_labels

            def on_epoch_end(self):
                if self.shuffle:
                    np.random.shuffle(self.indices)

        return BalancedGenerator(self, balanced_filenames, balanced_labels, directory, **kwargs)

# Usage:
balanced_train_datagen = BalancedImageDataGenerator(
    target_samples=5000,
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

balanced_val_datagen = BalancedImageDataGenerator(
    target_samples=5000,
    rescale=1./255
)

train_dir = '/content/Dataset/training'
val_dir = '/content/Dataset/validation'

train_generator = balanced_train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = balanced_val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 37607 images belonging to 10 classes.
Found 16132 images belonging to 10 classes.


**BUILD RESNET101 WITH SQUEEZE AND EXCITATION BLOCKS**

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet101

# Squeeze-and-Excitation Block
def se_block(input_tensor, ratio=16):
    channel_axis = 3  # Channels last format
    filters = input_tensor.shape[channel_axis]

    se = layers.GlobalAveragePooling2D()(input_tensor)
    se = layers.Reshape((1, 1, filters))(se)
    se = layers.Dense(filters // ratio, activation='relu')(se)
    se = layers.Dense(filters, activation='sigmoid')(se)

    return layers.multiply([input_tensor, se])

# Adding SE blocks to ResNet101
def build_resnet101_se(input_shape=(224, 224, 3), num_classes=10):
    base_model = ResNet101(include_top=False, input_shape=input_shape, weights='imagenet')
    x = base_model.output

    # Adding SE blocks at every stage
    for layer in base_model.layers:
        if isinstance(layer, layers.Conv2D):
            x = se_block(layer.output)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)  # Fully connected layer
    x = layers.Dropout(0.5)(x)
    output = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=base_model.input, outputs=output)

    return model

# Build model
model = build_resnet101_se()

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
from collections import Counter
from sklearn.utils import shuffle
from tensorflow.keras.utils import Sequence

class BalancedImageDataGenerator:
    def __init__(self, target_samples=5000, balance_classes=True, **kwargs):
        self.target_samples = target_samples
        self.balance_classes = balance_classes
        self.image_data_generator = ImageDataGenerator(**kwargs)

    def flow_from_directory(self, directory, **kwargs):
        # First, get the base generator
        base_generator = self.image_data_generator.flow_from_directory(
            directory,
            shuffle=False,
            **kwargs
        )

        # Get all filenames and their corresponding labels
        filenames = base_generator.filenames
        labels = base_generator.classes
        class_indices = base_generator.class_indices
        n_classes = len(class_indices)

        if not self.balance_classes:
            # If not balancing, just use original filenames and labels
            balanced_filenames = filenames
            balanced_labels = labels
        else:
            # Create balanced dataset
            balanced_filenames = []
            balanced_labels = []

            # Count samples per class
            class_counts = Counter(labels)

            for class_idx in range(n_classes):
                class_files = [f for f, l in zip(filenames, labels) if l == class_idx]
                class_count = len(class_files)

                if class_count >= self.target_samples:
                    # Undersample
                    selected_files = shuffle(class_files)[:self.target_samples]
                else:
                    # Oversample
                    multiplier = self.target_samples // class_count
                    remainder = self.target_samples % class_count
                    selected_files = class_files * multiplier + shuffle(class_files)[:remainder]

                balanced_filenames.extend(selected_files)
                balanced_labels.extend([class_idx] * len(selected_files))

        # Shuffle the dataset
        balanced_filenames, balanced_labels = shuffle(balanced_filenames, balanced_labels)

        class DataGenerator(Sequence):
            def __init__(self, parent, filenames, labels, directory, **kwargs):
                self.parent = parent
                self.filenames = filenames
                self.labels = labels
                self.directory = directory
                self.n = len(filenames)
                self.batch_size = kwargs.get('batch_size', 32)
                self.target_size = kwargs.get('target_size', (224, 224))
                self.shuffle = kwargs.get('shuffle', True)
                self.indices = np.arange(self.n)
                self.class_indices = class_indices

                if self.shuffle:
                    np.random.shuffle(self.indices)

            def __len__(self):
                return int(np.ceil(self.n / float(self.batch_size)))

            def __getitem__(self, idx):
                start_idx = idx * self.batch_size
                end_idx = min((idx + 1) * self.batch_size, self.n)
                batch_indices = self.indices[start_idx:end_idx]

                batch_files = [os.path.join(self.directory, self.filenames[i]) for i in batch_indices]
                batch_labels = [self.labels[i] for i in batch_indices]

                # Load and preprocess images
                batch_images = []
                for f in batch_files:
                    # Load image
                    img = tf.keras.utils.load_img(f, target_size=self.target_size)
                    # Convert to array
                    img_array = tf.keras.utils.img_to_array(img)
                    # Apply standardization
                    img_array = self.parent.image_data_generator.standardize(img_array)
                    # Apply random transformations
                    img_array = self.parent.image_data_generator.random_transform(img_array)
                    batch_images.append(img_array)

                batch_images = np.array(batch_images)

                # Convert labels to categorical
                batch_labels = tf.keras.utils.to_categorical(batch_labels, n_classes)

                return batch_images, batch_labels

            def on_epoch_end(self):
                if self.shuffle:
                    np.random.shuffle(self.indices)

        return DataGenerator(self, balanced_filenames, balanced_labels, directory, **kwargs)

# Usage:
# Training generator with balancing
balanced_train_datagen = BalancedImageDataGenerator(
    target_samples=5000,
    balance_classes=True,  # Enable balancing for training
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Validation generator without balancing
val_datagen = BalancedImageDataGenerator(
    balance_classes=False,  # Disable balancing for validation
    rescale=1./255
)

train_generator = balanced_train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 37607 images belonging to 10 classes.
Found 16132 images belonging to 10 classes.


In [None]:
history = model.fit(train_generator,
                    validation_data = validation_generator,
                    epochs=20,
                    callbacks=[
                        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
                        tf.keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=3)
                    ])

Epoch 1/20


  self._warn_if_super_not_called()


[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1149s[0m 664ms/step - accuracy: 0.4917 - loss: 1.5709 - val_accuracy: 0.8354 - val_loss: 0.5144 - learning_rate: 1.0000e-05
Epoch 2/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m992s[0m 621ms/step - accuracy: 0.8429 - loss: 0.4803 - val_accuracy: 0.8740 - val_loss: 0.4114 - learning_rate: 1.0000e-05
Epoch 3/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m965s[0m 616ms/step - accuracy: 0.8998 - loss: 0.3019 - val_accuracy: 0.8937 - val_loss: 0.3677 - learning_rate: 1.0000e-05
Epoch 4/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m961s[0m 613ms/step - accuracy: 0.9298 - loss: 0.2154 - val_accuracy: 0.8905 - val_loss: 0.4034 - learning_rate: 1.0000e-05
Epoch 5/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m966s[0m 616ms/step - accuracy: 0.9492 - loss: 0.1658 - val_accuracy: 0.9020 - val_loss: 0.3745 - learning_rate: 1.0000e-05
Epoch 6/20
[1m1563/1563[0m 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import balanced_accuracy_score,f1_score


# Set the limit on the number of batches to process
num_batches = 505

# Step 1: Generate predictions for a limited number of batches
val_predictions = []
val_true_classes = []

for i, (x, y) in enumerate(validation_generator):
    preds = resnet_model.predict(x)
    # print(preds.keys())
#     print(preds['output_0'])
    val_predictions.append(np.argmax(preds['output_0'], axis=1))
    val_true_classes.append(np.argmax(y, axis=1))
    if i >= 504:
      break;

# Step 2: Concatenate predictions and true labels across batches
val_pred_classes = np.concatenate(val_predictions)
val_true_classes = np.concatenate(val_true_classes)

print(val_true_classes.shape,val_pred_classes.shape)

# Evaluation function
def evaluate_predictions(y_true, y_pred):
    # y_true_classes = np.argmax(y_true, axis=1)
    # y_pred_classes = np.argmax(y_pred, axis=1)

    # Calculate balanced accuracy and F1 score
    bal_acc = balanced_accuracy_score(y_true_classes, y_pred_classes)
    f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

    return bal_acc, f1

# Evaluate predictions
bal_acc, f1 = evaluate_predictions(val_true_classes, val_pred_classes)
print(f"Ensemble Model - Balanced Accuracy: {bal_acc:.4f}, F1-Score: {f1:.4f}")

# Step 3: Compute the confusion matrix
conf_matrix = confusion_matrix(val_true_classes, val_pred_classes)

# Step 4: Plot the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Limited to 505 Batches)")
plt.show()


In [None]:
# Save the model in SavedModel format
model.export('/content/drive/MyDrive/models/resnet_modified_Kaist')

Saved artifact at '/content/drive/MyDrive/models/resnet_modified_Kaist'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_869')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  138895824802160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138895824801632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138895824808672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138895824800928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138895824800224: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138895824801280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138896248273232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138896248266368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138896251722160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138896251723392: TensorSpec(shape=(), dtype=