In [None]:
# --- 1. Imports and Setup ---
import sys
# !pip install -r requirements.txt # You only need to run this once

import tensorflow as tf
import numpy as np
import base64
import os
import json
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# --- 2. Load the Model ---
model_dir = 'models/openimages_v4_ssd_mobilenet_v2_1'
saved_model = tf.saved_model.load(model_dir)
detector = saved_model.signatures['default']


# --- 3. All Function Definitions ---

def detect(img):
    image = tf.image.decode_jpeg(img, channels=3)
    converted_img  = tf.image.convert_image_dtype(image, tf.float32)[tf.newaxis, ...]
    result = detector(converted_img)
    
    # Note: We must convert from EagerTensors to lists before returning
    output_dict = {key:value.numpy().tolist() for key, value in result.items()}
    output_dict['num_detections'] = len(result["detection_scores"][0]) # Get count from batch

    # We also need to get the items out of the batch (the [0] index)
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
    output_dict['detection_scores'] = output_dict['detection_scores'][0]
    output_dict['detection_class_entities'] = output_dict['detection_class_entities'][0]

    return output_dict


def clean_detections(detections):
    cleaned = []
    max_boxes = 10
    # Use the num_detections we calculated in detect()
    num_detections = min(detections['num_detections'], max_boxes)

    for i in range(0, num_detections):
        d = {
            'box': {
                'yMin': detections['detection_boxes'][i][0],
                'xMin': detections['detection_boxes'][i][1],
                'yMax': detections['detection_boxes'][i][2],
                'xMax': detections['detection_boxes'][i][3]
            },
            'class': detections['detection_class_entities'][i].decode('utf-8'),
            'label': detections['detection_class_entities'][i].decode('utf-8'),
            'score': detections['detection_scores'][i],
        }
        cleaned.append(d)
    
    return cleaned


def predict(body):
    base64img = body.get('image')
    img_bytes = base64.decodebytes(base64img.encode())
    detections = detect(img_bytes)
    cleaned = clean_detections(detections)

    return { 'detections': cleaned }


def display_image(image):
    fig = plt.figure(figsize=(10, 10))
    plt.grid(False)
    plt.imshow(image)


def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
    """Adds a bounding box to an image."""
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
               (left, top)], width=thickness, fill=color)

    # Calculate text height using getbbox
    display_str_heights = [(font.getbbox(ds)[3] - font.getbbox(ds)[1]) for ds in display_str_list]
    
    total_display_str_height = 0.0
    if display_str_heights:
        total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
        
    # Draw text box for each string
    for display_str in display_str_list[::-1]:
        bbox = font.getbbox(display_str)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
        margin = np.ceil(0.05 * text_height)
        
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                          (left + text_width, text_bottom)],
                         fill=color)
        
        draw.text((left + margin, text_bottom - text_height - margin),
                    display_str,
                    fill="black",
                    font=font)
        
        text_bottom -= (text_height + 2 * margin)


def draw_boxes(image, detections_list, max_boxes=10, min_score=0.1):
    """
    Overlay labeled boxes on an image with formatted scores and label names.
    This version reads a LIST of detection dictionaries.
    """
    colors = list(ImageColor.colormap.values())
    font = ImageFont.load_default()
    
    # Convert to PIL Image once
    image_pil = Image.fromarray(np.uint8(image)).convert("RGB")

    # Loop through the list of detection dictionaries
    for i in range(min(len(detections_list), max_boxes)):
        
        # Get the dictionary for this detection
        detection = detections_list[i]
        
        if detection['score'] >= min_score:
            # Extract data from the dictionary
            box = detection['box']
            ymin, xmin, ymax, xmax = box['yMin'], box['xMin'], box['yMax'], box['xMax']
            
            display_str = "{}: {}%".format(detection['class'], int(100 * detection['score']))
            color = colors[hash(detection['class']) % len(colors)]

            # Draw on the single PIL image
            draw_bounding_box_on_image(
                image_pil,
                ymin,
                xmin,
                ymax,
                xmax,
                color,
                font,
                display_str_list=[display_str])
    
    # Copy back to the numpy array once
    np.copyto(image, np.array(image_pil))
    return image


# --- 4. Main Execution ---

# Set image file location
my_image = 'twodogs.jpg'

# --- Run Prediction ---
# This block prepares the image and runs the full predict() pipeline
with open(my_image, "rb") as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
content = {"image": encoded_image}

result = predict(content)
# print(result['detections']) # Uncomment this if you want to see the JSON output


# --- Draw and Display ---
# This block loads the *original* image file for drawing on
# and then uses the 'result' from the prediction.
print("Drawing boxes on image...")
image_np = np.array(Image.open(my_image))

# Call the drawing function with the image and the detections list
image_with_boxes = draw_boxes(image_np, result['detections'])

# Display the final image
display_image(image_with_boxes)

print("Done.")