In [179]:
from ultralytics import YOLO, RTDETR
from pathlib import Path 
import cv2
from dotenv import load_dotenv
import os
import requests
import urllib
from io import BytesIO
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets

%matplotlib inline

os.chdir('/home/camille/code/surveillance/')
_ = load_dotenv()


# Re-surveilling surveillance

This is a demonstration of using object detection models to locate surveillance cameras in Google Street View imagery. There are two models: **YOLO**, which runs relatively quickly but is less accurate, and **RT-DETR**, a newer model type that is slower but better at finding cameras.

Enter a location to look up and adjust the camera parameters. After clicking the button, any cameras detected by the models will be highlighted.

In [180]:
yolo = YOLO('./weights/yolo_tile_best.pt')
detr = RTDETR('./weights/detr_tile_frz_best.pt')

DEFAULTS = {
    'size': 640,
    'heading': 160,
    'pitch': 10,
    'fov': 50,
    'model': 'detr'
}

rows = 4
cols = 3
grid = widgets.GridspecLayout(rows, cols, 
                              min_height = '400px', 
                              grid_gap = '8px',
                              merge = False)


In [181]:
def get_sv_img(
    location: str,  
    key_name: str = 'GOOGLE_KEY',
    size: int = DEFAULTS['size'],
    heading: int = DEFAULTS['heading'],
    pitch: int = DEFAULTS['pitch'],
    fov: int = DEFAULTS['fov']
) -> Image:
    img_params = {
        'location': location,
        'size': f'{size}x{size}',
        'heading': heading,
        'pitch': pitch,
        'fov': fov,
        'key': os.getenv(key_name)
    }
    r = requests.get('https://maps.googleapis.com/maps/api/streetview', params = urllib.parse.urlencode(img_params))
    img = Image.open(BytesIO(r.content))
    return img

def make_img_widget(img: Image, label: str, size: int = 480) -> widgets.Image:
    buff = BytesIO()
    img.save(buff, format = 'PNG')
    img_widget = widgets.Image(value = buff.getvalue(), width = size, height = size)
    grd = widgets.VBox([widgets.Label(label), img_widget])
    return grd

def label_img(img, model) -> Image:
    pred = model.predict(img, device = 'cpu')[0].plot(labels = False)
    pred = cv2.cvtColor(pred, cv2.COLOR_BGR2RGB)
    return Image.fromarray(pred)

In [182]:
lbl_style = {'description_width': '200px'}
layout = widgets.Layout(width='60%')
location = widgets.Text(value='33rd & Loch Raven Baltimore MD',
                        description='Location',
                        layout=layout,
                        style=lbl_style)
size = widgets.IntSlider(value=DEFAULTS['size'],
                         min=100,
                         max=1024,
                         layout=layout,
                         style=lbl_style,
                         description='Image size')
heading = widgets.IntSlider(value=DEFAULTS['heading'],
                            min=0,
                            max=360,
                            layout=layout,
                            style=lbl_style,
                            description='Heading (rotation)')
pitch = widgets.IntSlider(value=DEFAULTS['pitch'],
                          min=0,
                          max=40,
                          layout=layout,
                          style=lbl_style,
                          description='Pitch (tilt)')
fov = widgets.IntSlider(value=DEFAULTS['fov'],
                        min=10,
                        max=120,
                        layout=layout,
                        style=lbl_style,
                        description='Field of view (zoom)')

button = widgets.Button(description='Get image', button_style = 'primary')

mod_choice = widgets.Dropdown(value=DEFAULTS['model'],
                              options=[('RT-DETR', 'detr'), ('YOLOv8', 'yolo')],
                              layout=layout,
                              style=lbl_style,
                              description='Type of model')

grid[0, :] = widgets.VBox([location, size, heading, pitch, fov, button],
                          layout = widgets.Layout(height = 'auto'))


def button_click(b):
    img = get_sv_img(location=location.value,
                     size=size.value,
                     heading=heading.value,
                     pitch=pitch.value,
                     fov=fov.value)
    if img is not None:
        grid[1:, 0] = make_img_widget(img, 'Original image')
        yolo_pred = label_img(img, yolo)
        grid[1:, 1] = make_img_widget(yolo_pred, 'YOLO predictions')
        detr_pred = label_img(img, detr)
        grid[1:, 2] = make_img_widget(detr_pred, 'RT-DETR predictions')
        

button.on_click(button_click)



0: 640x640 1 surveillance, 262.8ms
Speed: 1.9ms preprocess, 262.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 surveillances, 988.3ms
Speed: 1.8ms preprocess, 988.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)


In [184]:
display(grid)

GridspecLayout(children=(VBox(children=(Text(value='33rd & Loch Raven Baltimore MD', description='Location', l…