In [1]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.0 (from gradio)
  Downloading gradio_client-1.10.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6

In [2]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [3]:
import gradio as gr
import tensorflow as tf
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont
import os
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# Load your CNN model
def load_cnn_model(model_path):
    try:
        model = tf.keras.models.load_model(model_path)
        print(f"Model loaded successfully from {model_path}")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

# Path to your model file
model_path = "best_cnn_model.h5"

# Try to load the model
model = load_cnn_model(model_path)

# Define class names
CLASS_NAMES = ["ball", "circle cage", "cube", "cylinder", "human body",
              "metal bucket", "plane", "rov", "square cage", "tyre"]

# Define preprocessing function to match training
def preprocess_image(img):

    # Convert PIL Image to OpenCV format (RGB to BGR)
    img_cv = np.array(img)
    if len(img_cv.shape) == 3 and img_cv.shape[2] == 3:
        img_cv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)

    # Resize image to the size your model expects
    target_size = (64, 64)
    img_resized = cv2.resize(img_cv, target_size, interpolation=cv2.INTER_AREA)


    img_processed = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    img_processed = np.expand_dims(img_processed, axis=-1)  # Add channel dimension
    img_processed = img_resized
    img_normalized = img_processed.astype(np.float32) / 255.0
    img_batch = np.expand_dims(img_normalized, axis=0)

    return img_batch

# Function to draw detection results on the image
def draw_results(img, predicted_class, confidence, top_3_classes=None):
    # Convert to a format we can draw on
    draw_img = img.copy()
    draw = ImageDraw.Draw(draw_img)
    width, height = img.size

    # Color mapping based on confidence
    if confidence < 0.3:
        color = "red"  # Low confidence
    elif confidence < 0.7:
        color = "yellow"  # Medium confidence
    else:
        color = "green"  # High confidence

    # Draw bounding box (50% of image size, centered)
    rect_width = width * 0.6
    rect_height = height * 0.6
    left = (width - rect_width) / 2
    top = (height - rect_height) / 2
    right = left + rect_width
    bottom = top + rect_height

    # Draw rectangle with thicker border for better visibility
    line_width = max(3, int(min(width, height) / 100))
    draw.rectangle([left, top, right, bottom], outline=color, width=line_width)

    # Try to load a font that supports all characters
    try:
        font = ImageFont.truetype("Arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw label with confidence
    label = f"{predicted_class}: {confidence:.1%}"
    text_width, text_height = draw.textsize(label, font=font) if hasattr(draw, 'textsize') else (len(label)*10, 20)

    # Create a semi-transparent background for text
    draw.rectangle([left, top - text_height - 10, left + text_width + 10, top],
                  fill=(0, 0, 0, 128))

    # Draw text
    draw.text((left + 5, top - text_height - 5), label, fill=color, font=font)

    # If top 3 classes are provided, show them at the bottom
    if top_3_classes:
        y_pos = bottom + 10
        for cls, conf in top_3_classes:
            text = f"{cls}: {conf:.1%}"
            draw.rectangle([left, y_pos, left + text_width + 10, y_pos + text_height + 5],
                         fill=(0, 0, 0, 128))
            draw.text((left + 5, y_pos), text, fill="white", font=font)
            y_pos += text_height + 10

    return draw_img

# Function to apply data augmentation for better visualization
def visualize_augmentations(img, predicted_class):
    aug_results = []
    original_img = np.array(img)

    # Resize to model input size and then back for consistency
    target_size = (64, 64)
    aug_img = cv2.resize(original_img, target_size)
    aug_img = cv2.resize(aug_img, (img.width, img.height))
    aug_results.append(("Original (Resized)", Image.fromarray(aug_img)))

    # 1. Brightness adjustment
    aug_img = np.array(img) * 0.8  # Darker
    aug_img = np.clip(aug_img, 0, 255).astype(np.uint8)
    aug_results.append(("Darker", Image.fromarray(aug_img)))

    # 2. Brightness adjustment
    aug_img = np.array(img) * 1.2  # Brighter
    aug_img = np.clip(aug_img, 0, 255).astype(np.uint8)
    aug_results.append(("Brighter", Image.fromarray(aug_img)))

    # 3. Add noise
    aug_img = np.array(img)
    noise = np.random.normal(0, 15, aug_img.shape).astype(np.uint8)
    aug_img = np.clip(aug_img + noise, 0, 255).astype(np.uint8)
    aug_results.append(("Noisy", Image.fromarray(aug_img)))

    # 4. Blur
    aug_img = np.array(img)
    aug_img = cv2.GaussianBlur(aug_img, (5, 5), 0)
    aug_results.append(("Blurred", Image.fromarray(aug_img)))

    return aug_results

# Define prediction function with better error handling and debugging
def predict(img):
    if img is None:
        return None, None, {"error": "Please provide an image"}

    if model is None:
        return None, None, {"error": "Model could not be loaded. Please check the model path and format."}

    try:
        # Store original image for visualization
        original_img = img.copy()

        # Preprocess the image
        processed_img = preprocess_image(img)

        # Debug info about processed image
        print(f"Processed image shape: {processed_img.shape}")
        print(f"Processed image min: {np.min(processed_img)}, max: {np.max(processed_img)}")

        # Make prediction
        prediction = model.predict(processed_img)

        # Debug info about prediction
        print(f"Prediction type: {type(prediction)}")
        if isinstance(prediction, list):
            for i, p in enumerate(prediction):
                print(f"Prediction[{i}] shape: {p.shape}")
        else:
            print(f"Prediction shape: {prediction.shape}")

        # Parse prediction results
        result_json = {}
        top_3_classes = []

        # Handle different output types
        if isinstance(prediction, list):
            # If model has multiple outputs (e.g., bbox + class)
            if len(prediction) >= 2:
                # Assuming first element is bounding box and second is class
                class_pred = prediction[1]
                class_pred = class_pred[0]  # Get first batch item
            else:
                class_pred = prediction[0][0]
        elif isinstance(prediction, np.ndarray):
            if len(prediction.shape) == 2:
                class_pred = prediction[0]  # First batch item
            else:
                # Handle unusual output shapes
                class_pred = prediction.flatten()[:len(CLASS_NAMES)]
        else:
            return img, None, {"error": f"Unexpected prediction type: {type(prediction)}"}

        # Print raw predictions for debugging
        print("Raw class predictions:")
        for i, score in enumerate(class_pred):
            if i < len(CLASS_NAMES):
                print(f"{CLASS_NAMES[i]}: {score:.4f}")

        # Get predicted class and confidence
        predicted_class_idx = np.argmax(class_pred)
        predicted_class = CLASS_NAMES[predicted_class_idx]
        confidence = float(class_pred[predicted_class_idx])

        # Get top 3 predictions for display
        top_indices = np.argsort(class_pred)[-3:][::-1]
        top_3_classes = [(CLASS_NAMES[i], float(class_pred[i])) for i in top_indices]

        # Create result JSON
        result_json["Predicted Class"] = predicted_class
        result_json["Confidence"] = f"{confidence:.2%}"

        # Add top 3 classes to results
        for cls, conf in top_3_classes:
            result_json[f"{cls}"] = f"{conf:.2%}"

        # Create augmentation grid for debugging
        aug_results = visualize_augmentations(original_img, predicted_class)
        aug_grid = Image.new('RGB', (original_img.width * 3, original_img.height * 2))

        # Place original image with detection
        img_with_detection = draw_results(original_img, predicted_class, confidence, top_3_classes)
        aug_grid.paste(img_with_detection, (0, 0))

        # Place augmented versions
        for i, (aug_name, aug_img) in enumerate(aug_results):
            if i >= 5:  # Limit to 5 augmentations
                break
            row, col = divmod(i + 1, 3)
            aug_grid.paste(aug_img, (col * original_img.width, row * original_img.height))

        return img_with_detection, aug_grid, result_json

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Detailed error: {error_details}")
        return img, None, {"error": f"Prediction failed: {str(e)}"}

# the Gradio interface with layout
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil", label="Input Image"),
    outputs=[
        gr.Image(type="pil", label="Detection Result"),
        gr.Image(type="pil", label="Augmentation Tests"),
        gr.JSON(label="Prediction Details")
    ],
    title="Underwater Object Detection DL Model",
    description="""
    Upload an underwater sonar image to detect objects using the CNN model.

    The detection shows:
    1. The predicted object class with confidence score
    2. A bounding box highlighting the detected object
    3. Alternative augmentations to test model robustness

    The model can detect: ball, circle cage, cube, cylinder, human body, metal bucket, plane, ROV, square cage, tyre
    """
)

# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)



Model loaded successfully from best_cnn_model.h5
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d3e0923eea52a141ec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
