In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import ResNet152V2, MobileNet
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import glob

In [None]:
# Check TensorFlow and GPU setup
print("TensorFlow version:", tf.__version__)
print("GPUs available:", tf.config.list_physical_devices('GPU'))

In [None]:
# Dataset Paths
CANCER_PATH = '/kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER'
NON_CANCER_PATH = '/kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/NON CANCER'

In [None]:
# Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [None]:
# Safe Data Generator
class SafeDataGenerator(Sequence):
    def __init__(self, file_paths, labels, batch_size, target_size=(224, 224)):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.target_size = target_size

    def __len__(self):
        return len(self.file_paths) // self.batch_size

    def __getitem__(self, idx):
        batch_paths = self.file_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
        images, labels = [], []
        for path, label in zip(batch_paths, batch_labels):
            try:
                img = load_img(path, target_size=self.target_size)
                img_array = img_to_array(img) / 255.0  # Normalize
                images.append(img_array)
                labels.append(label)
            except Exception as e:
                print(f"Skipping corrupted file: {path}, Error: {e}")
        return np.array(images), np.array(labels)

In [None]:
# Collect file paths
cancer_files = glob.glob(f"{CANCER_PATH}/*.jpeg")
non_cancer_files = glob.glob(f"{NON_CANCER_PATH}/*.jpeg")

file_paths = cancer_files + non_cancer_files
labels = [1] * len(cancer_files) + [0] * len(non_cancer_files)

In [None]:
# Shuffle dataset
file_paths, labels = shuffle(file_paths, labels, random_state=42)

In [None]:
# Split into training and validation
split_idx = int(len(file_paths) * 0.8)
train_paths, val_paths = file_paths[:split_idx], file_paths[split_idx:]
train_labels, val_labels = labels[:split_idx], labels[split_idx:]

In [None]:
# Create generators
train_generator = SafeDataGenerator(train_paths, train_labels, batch_size=BATCH_SIZE, target_size=IMG_SIZE)
val_generator = SafeDataGenerator(val_paths, val_labels, batch_size=BATCH_SIZE, target_size=IMG_SIZE)

In [None]:
# Define ResNet152V2 Model
def build_resnet152v2(input_shape=(224, 224, 3)):
    base_model = ResNet152V2(weights='imagenet', include_top=False, input_shape=input_shape)
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(128, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)  # Binary classification
    model = Model(inputs=base_model.input, outputs=x)

    # Freeze base model layers
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Define MobileNet Model
def build_mobilenet(input_shape=(224, 224, 3)):
    base_model = MobileNet(weights='imagenet', include_top=False, input_shape=input_shape)
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(128, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)  # Binary classification
    model = Model(inputs=base_model.input, outputs=x)

    # Freeze base model layers
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Build Models
resnet_model = build_resnet152v2()
mobilenet_model = build_mobilenet()

In [None]:
# Train ResNet152V2
history_resnet = resnet_model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)

# Save ResNet152V2 Model
resnet_model.save("/kaggle/working/resnet152v2_oral_cancer.h5")

In [None]:
# Train MobileNet
history_mobilenet = mobilenet_model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)

# Save MobileNet Model
mobilenet_model.save("/kaggle/working/mobilenet_oral_cancer.h5")

In [1]:
# Results

TensorFlow version: 2.17.1
GPUs available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m234545216/234545216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10


  self._warn_if_super_not_called()


Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/273.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x78892883e2f0>
Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/101.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x78892883e570>
Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/407.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x78892883e340>
Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/481.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x7889289dfc40>
[1m 1/23[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12:28[0m 34s/step - accuracy: 0.3871 - loss: 0.9560Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/200.jpeg



Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/485.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x7888f0777560>
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 2s/step - accuracy: 0.7399 - loss: 0.5586 - val_accuracy: 0.8868 - val_loss: 0.2889
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/101.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x7888f0617650>
Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/328.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x7888f0617830>
Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/407.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x7888f0617970>
Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/481.jpeg, Error: cannot identify image file <_io.BytesIO object at 0x7888f0617830>
[1m 2/23[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m8s[0m 421ms/step - accuracy: 0.9439 - loss: 0.1772Skipping corrupted file: /kaggle/input/oral-cancer-dataset/Oral cancer Dataset 2.0/OC Dataset kaggle new/CANCER/

In [None]:
# Custom Prediction Function
def predict_custom_image(model_path, image_path):
    model = tf.keras.models.load_model(model_path)
    image = load_img(image_path, target_size=IMG_SIZE)
    image_array = img_to_array(image) / 255.0
    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension

    prediction = model.predict(image_array)
    class_name = 'Cancer' if prediction[0][0] > 0.5 else 'Non-Cancer'
    print(f"Prediction: {class_name}, Confidence: {prediction[0][0]:.2f}")

# Example Usage 
predict_custom_image("/kaggle/working/resnet152v2_oral_cancer.h5", "/path/to/custom/image.jpg")
predict_custom_image("/kaggle/working/mobilenet_oral_cancer.h5", "/path/to/custom/image.jpg")