In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python


import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.utils import shuffle
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import load_model



print("GPU:", tf.config.list_physical_devices('GPU'))
!nvidia-smi

In [None]:
import os, cv2, numpy as np
from tqdm import tqdm

dataset_path = "/kaggle/input/breakhis/Breakhis"
im_size      = 260

images, labels = [], []
total_seen = good_resized = skipped_notimg = skipped_unread = 0

categories = os.listdir(dataset_path)   # ['benign', 'malignant']

for category in categories:
    class_path = os.path.join(dataset_path, category)
    if not os.path.isdir(class_path):
        continue

    for fname in tqdm(os.listdir(class_path), desc=f"Loading {category}"):
        total_seen += 1

        if not fname.lower().endswith(('.png', '.jpg', '.jpeg')):
            skipped_notimg += 1
            continue

        fpath = os.path.join(class_path, fname)
        if not os.path.isfile(fpath):
            skipped_notimg += 1
            continue

        img = cv2.imread(fpath)
        if img is None:
            skipped_unread += 1
            continue

        img = cv2.resize(img, (im_size, im_size))
        good_resized += 1

        images.append(img)
        labels.append(category)

# Converting to numpy arrays
images = np.array(images, dtype=np.float32) / 255.0     # (N, 260, 260, 3)
labels = np.array(labels).ravel()                       # (N,)

print("\n===== Loader Summary =====")
print(f"Total entries seen   : {total_seen}")
print(f"Successfully resized : {good_resized}")
print(f"Skipped (not images) : {skipped_notimg}")
print(f"Skipped (unreadable) : {skipped_unread}")
print(f"images shape         : {images.shape}")
print(f"labels raw shape     : {labels.shape}")

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

label_encoder = LabelEncoder()
labels_int = label_encoder.fit_transform(labels)            # 1‑D, shape (N,)

one_hot_encoder = OneHotEncoder(sparse_output=False)
labels_onehot = one_hot_encoder.fit_transform(labels_int.reshape(-1, 1))  # (N, C)

print("\nEncoded shapes:")
print("labels_int   :", labels_int.shape)
print("labels_onehot:", labels_onehot.shape)

In [None]:
# Mixed Precision for Faster Training
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')  # Uses FP16 for speed

In [None]:
# EfficientNet Model
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB2

num_classes = len(categories)

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

In [None]:
# Split Dataset
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

images, labels_onehot = shuffle(images, labels_onehot, random_state=42)

train_x, val_x, train_y, val_y = train_test_split(
    images,
    labels_onehot,
    test_size=0.1,
    random_state=42,
    stratify=labels_int
)

train_y = train_y.astype('float32')
val_y   = val_y.astype('float32')
num_classes = train_y.shape[1]

In [None]:
# Model Training
from tensorflow.keras import models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Pretrained base with transfer learning
base_model = EfficientNetB2(weights='imagenet', include_top=False, input_shape=(260, 260, 3))

# Freeze base
base_model.trainable = False

# Building classification head
inputs = tf.keras.Input(shape=(260, 260, 3))
x = data_augmentation(inputs)  # augmentation pipeline
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)  # dropout to reduce overfitting
outputs = layers.Dense(num_classes, activation='softmax')(x)
model = models.Model(inputs, outputs)

# Compiling with a lower LR
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
callbacks = [
    ModelCheckpoint('/kaggle/working/best_model.keras',  # Save in the working directory
                   save_best_only=True,
                   monitor='val_loss',
                   mode='min'),
    EarlyStopping(monitor='val_loss',
                 patience=7,
                 restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss',
                     factor=0.5,
                     patience=3)
]

# Train first phase
initial_epochs = 32
history = model.fit(
    train_x, train_y,
    epochs=initial_epochs,
    batch_size=16,
    validation_data=(val_x, val_y),
    callbacks=callbacks
)

# Unfreeze deeper layers for fine-tuning
base_model.trainable = True
# freeze the first few layers (optional)
for layer in base_model.layers[:100]:
    layer.trainable = False

# Lower LR again for fine-tuning
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Fine-tuning phase
fine_tune_epochs = 32
total_epochs = initial_epochs + fine_tune_epochs

history_fine = model.fit(
    train_x, train_y,
    epochs=total_epochs,
    initial_epoch=history.epoch[-1] + 1,  # start from last epoch
    batch_size=16,
   validation_data=(val_x, val_y),
    callbacks=callbacks  # same callbacks to keep saving best model
)

In [None]:
import matplotlib.pyplot as plt

# Collect the metrics from both phases
acc      = history.history['accuracy'] + history_fine.history['accuracy']
val_acc  = history.history['val_accuracy'] + history_fine.history['val_accuracy']
loss     = history.history['loss'] + history_fine.history['loss']
val_loss = history.history['val_loss'] + history_fine.history['val_loss']

epochs = range(1, len(acc) + 1)

#Plot accuracy
plt.figure(figsize=(6, 4))
plt.plot(epochs, acc,      label='Training Accuracy')
plt.plot(epochs, val_acc,  label='Validation Accuracy')
plt.title('Training vs. Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

# Plot loss 
plt.figure(figsize=(6, 4))
plt.plot(epochs, loss,     label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training vs. Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Evaluation on Test Set
best_model = tf.keras.models.load_model('best_model.keras')

test_loss, test_acc = best_model.evaluate(test_x, test_y)
print(f"\nTest Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Class probabilities
y_pred_probs = best_model.predict(test_x)

# Converting one-hot encoded labels to class indices
y_true = np.argmax(test_y, axis=1)
y_pred = np.argmax(y_pred_probs, axis=1)

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)

# Binary classification
if cm.shape == (2, 2):
    TN, FP, FN, TP = cm.ravel()

    sensitivity = TP / (TP + FN)  # Recall / TPR
    specificity = TN / (TN + FP)  # TNR

    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")

else:
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=categories))

In [None]:

def upload_image():
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        return filename
    return None

# Prediction function with proper error handling
def predict_unseen_image(model):
    """Helper function to load, preprocess and predict on a single image"""
    try:
        # Upload the image
        print("Please upload an image file:")
        img_path = upload_image()

        if not img_path:
            raise FileNotFoundError("No image was uploaded")

        # Load the image
        img = cv2.imread(img_path)
        if img is None:
            raise FileNotFoundError(f"Could not read image at path: {img_path}")

        # Preprocess
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        img = cv2.resize(img, (260, 260))  # Resize to B2 model's input size

        # Display the image
        plt.figure(figsize=(6, 6))
        plt.imshow(img)
        plt.axis("off")
        plt.title("Input Image")
        plt.show()

        # Prepare for model prediction
        x = np.expand_dims(img, axis=0)  # Add batch dimension
        x = tf.keras.applications.efficientnet.preprocess_input(x)  # Use correct preprocessing

        # Make prediction
        prediction = model.predict(x)
        predicted_class = np.argmax(prediction)
        confidence = np.max(prediction)

        print("\nPrediction Results:")
        print(f"Raw Prediction Output: {prediction}")
        print(f"Predicted Class: {predicted_class}")
        print(f"Confidence: {confidence:.2%}")

        return prediction

    except Exception as e:
        print(f"\nError: {e}")
        return None

# Run the prediction
predict_unseen_image(best_model)