In [1]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

In [10]:
data_dir = '/content/drive/MyDrive/train'  # Folder containing class subfolders
image_size = (128, 128)
batch_size = 32
epochs = 25

In [11]:
images = []
labels = []

In [12]:
# Get class names from subfolders and assign a class index
class_names = os.listdir(data_dir)
class_indices = {class_name: idx for idx, class_name in enumerate(class_names)}

In [13]:
for class_label, idx in class_indices.items():
    class_path = os.path.join(data_dir, class_label)
    if os.path.isdir(class_path):
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            try:
                # Load image with target size
                img = load_img(img_path, target_size=image_size)
                img_array = img_to_array(img)  # Convert image to array
                images.append(img_array)
                labels.append(idx)  # Use class index as label
            except UnidentifiedImageError:
                print(f"Skipped unreadable image file: {img_path}")
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")

In [14]:
# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

In [15]:
# Split data into training, validation, and test sets (e.g., 70% train, 15% validation, 15% test)
train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.3, random_state=42
)
val_images, test_images, val_labels, test_labels = train_test_split(
    test_images, test_labels, test_size=0.5, random_state=42
)

In [16]:
# Define ImageDataGenerators with normalization and augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Create data generators
train_generator = train_datagen.flow(
    train_images, train_labels, batch_size=batch_size
)

val_generator = val_datagen.flow(
    val_images, val_labels, batch_size=batch_size
)

test_generator = test_datagen.flow(
    test_images, test_labels, batch_size=batch_size, shuffle=False
)

In [17]:
# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Dropout to prevent overfitting
    Dense(1, activation='sigmoid')  # Binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

In [23]:
# Debugging: Check train and validation generator sizes
print(f"Number of training batches: {len(train_generator)}")
print(f"Number of validation batches: {len(val_generator)}")

# Adjust validation_steps based on val_generator size
validation_steps = len(val_generator) if len(val_generator) > 0 else 1

# Train the model with adjusted validation steps
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=1,
    validation_data=val_generator,
    validation_steps=validation_steps  # Adjusted validation steps
)


Number of training batches: 1
Number of validation batches: 1
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - accuracy: 0.9091 - loss: 0.3042 - val_accuracy: 1.0000 - val_loss: 0.1589


In [24]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_generator, verbose=1)
print(f"Test Accuracy: {test_acc:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.3333 - loss: 0.8161
Test Accuracy: 0.3333


  self._warn_if_super_not_called()


In [27]:
model.save('lymph detector.h5')

