In [None]:
import tensorflow as tf, tensorflow_hub as hub, numpy as np
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
# inception resnet version 2
module_handle = "https://kaggle.com/models/tensorflow/faster-rcnn-inception-resnet-v2/frameworks/TensorFlow2/variations/640x640/versions/1"
model = hub.load(module_handle)

# take a look at the available signatures for this particular model
model.signatures.keys()

In [None]:
detector = model.signatures['serving_default']

In [None]:
img_path = "./dublin014.jpeg"

# 1. Read and Decode (Output: tf.uint8)
image_bytes = tf.io.read_file(img_path)
image_tensor_uint8 = tf.image.decode_jpeg(image_bytes, channels=3)

# 2. Convert to Float for Resizing
# Resizing functions in TF typically operate on float tensors.
image_tensor_float = tf.image.convert_image_dtype(image_tensor_uint8, tf.float32)

# 3. Resize (Output: tf.float32)
# Remove the problematic 'dtype=tf.uint8' argument.
# Note: tf.image.resize expects a rank 3 (H, W, C) tensor.
resized_image_float = tf.image.resize(
    image_tensor_float, 
    [256, 256], 
    method='lanczos3'
)

# 4. Convert back to tf.uint8 for the detector
# This step is critical because your model signature requires tf.uint8 (0-255).
# The resize operation scaled the values to [0.0, 1.0], so converting back
# to uint8 automatically scales them back to [0, 255].
final_image_uint8 = tf.image.convert_image_dtype(resized_image_float, tf.uint8)

# 5. Run Detector
# Add the batch dimension [tf.newaxis, ...]
results = detector(final_image_uint8[tf.newaxis, ...])
result = {key:value.numpy() for key,value in results.items()}


In [None]:
# ----------------------------------------------------------------------
# COCO Label Map (The model is trained on a COCO-like dataset)
# We only need the ID-to-name mapping. IDs are 1-based.
# ----------------------------------------------------------------------
# NOTE: The model outputs classes as 1-90, but the list is 0-indexed.
# The `detection_classes` tensor has values 1-90. We use list index (ID-1)
# to get the name.
COCO_CLASSES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 
    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 
    '__skip__', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 
    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 
    '__skip__', 'backpack', 'umbrella', '__skip__', '__skip__', 'handbag', 
    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 
    'tennis racket', 'bottle', '__skip__', 'wine glass', 'cup', 'fork', 
    'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 
    'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 
    'couch', 'potted plant', 'bed', '__skip__', 'dining table', '__skip__', 
    '__skip__', 'toilet', '__skip__', 'tv', 'laptop', 'mouse', 'remote', 
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 
    'refrigerator', '__skip__', 'book', 'clock', 'vase', 'scissors', 
    'teddy bear', 'hair drier', 'toothbrush'
]


def visualize_detections(image_tensor, result_dict, score_threshold=0.5, max_detections=10):
    """
    Draws bounding boxes and labels on the image for top detections.

    Args:
        image_tensor (tf.Tensor): The original image tensor (H, W, 3, tf.uint8).
        result_dict (dict): Dictionary of numpy arrays from model inference.
        score_threshold (float): Minimum score to display a detection.
        max_detections (int): Maximum number of detections to display.
    """
    
    # --- 1. Filter and Prepare Results ---
    
    # Remove the batch dimension (axis 0) from all outputs
    boxes = result_dict['detection_boxes'][0]      # shape (100, 4)
    scores = result_dict['detection_scores'][0]    # shape (100,)
    classes = result_dict['detection_classes'][0].astype(int) # shape (100,)
    
    # Apply score threshold and max detections limit
    # We use numpy indexing for efficient filtering
    valid_indices = np.where(scores >= score_threshold)[0]
    
    # Limit to the top 'max_detections' based on sorted scores
    top_indices = valid_indices[np.argsort(scores[valid_indices])[::-1]][:max_detections]
    
    final_boxes = boxes[top_indices]
    final_scores = scores[top_indices]
    final_classes = classes[top_indices]

    # --- 2. Setup Plotting ---
    
    fig, ax = plt.subplots(1, figsize=(10, 10), dpi=300)
    # Matplotlib needs a numpy array
    ax.imshow(image_tensor.numpy())
    
    height, width, _ = image_tensor.shape
    
    # --- 3. Draw Bounding Boxes ---
    
    for i in range(len(final_boxes)):
        ymin, xmin, ymax, xmax = final_boxes[i]
        score = final_scores[i]
        class_id = final_classes[i]
        
        # Convert normalized coordinates (0-1) to absolute pixel values
        x = xmin * width
        y = ymin * height
        w = (xmax - xmin) * width
        h = (ymax - ymin) * height
        
        # Get the class label
        # Class IDs are 1-based, so use index (ID - 1)
        label = f"{COCO_CLASSES[class_id]}: {score:.2f}"
        
        # Create a Matplotlib rectangle patch (green for visibility)
        rect = plt.Rectangle((x, y), w, h, 
                             fill=False, 
                             edgecolor='g', 
                             linewidth=1)
        ax.add_patch(rect)
        
        # Add the label text above the box
        ax.text(x, y - 5, label, 
                color='white', 
                fontsize=5, 
                bbox=dict(facecolor='g', alpha=0.6, pad=2))

    ax.set_title(f"Object Detection Results (Top {len(final_boxes)} Detections)")
    ax.axis('off')
    plt.show()

# ----------------------------------------------------------------------
# Execute Visualization
# ----------------------------------------------------------------------
# We need the original, decoded, but UNRESIZED image tensor for the visualization
# to get the correct absolute coordinates.
original_image_bytes = tf.io.read_file(img_path)
original_image_tensor = tf.image.decode_jpeg(original_image_bytes, channels=3)

# Run the visualization with the results and the image
visualize_detections(original_image_tensor, result, score_threshold=0.5, max_detections=10)