In [5]:
#!/usr/bin/python3

# ****************************************************************************
# Copyright(c) 2017 Intel Corporation.
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# DIY smart security camera PoC using Raspberry Pi Camera and
# Intel® Movidius™ Neural Compute Stick (NCS)

import os
import sys
import numpy
import select
import ntpath
import argparse
import picamera
import picamera.array

import mvnc.mvncapi as mvnc

from PIL import Image
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont

from time import localtime, strftime

# "Class of interest" - Display detections only if they match this class ID
CLASS_PERSON = 15

# Detection threshold: Minimum confidance to tag as valid detection
CONFIDANCE_THRESHOLD = 0.60  # 60% confidant

# Variable to store commandline arguments
ARGS = None

In [3]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help deserialize the output list from
# Intel® Movidius™ Neural Compute Stick (NCS) 

# ---- Deserialize the output from an SSD based network ----
# @param output The NCS returns a list/array in this structure:
# First float16: Number of detections
# Next 6 values: Unused
# Next consecutive batch of 7 values: Detection values
#   0: Image ID (always 0)
#   1: Class ID (index into labels.txt)
#   2: Detection score
#   3: Box left coordinate (x1) - scaled value between 0 & 1
#   4: Box top coordinate (y1) - scaled value between 0 & 1
#   5: Box right coordinate (x2) - scaled value between 0 & 1
#   6: Box bottom coordinate (y2) - scaled value between 0 & 1
#
# @return output_dict A Python dictionary with the following keys:
# output_dict['num_detections'] = Total number of valid detections
# output_dict['detection_classes_<X>'] = Class ID of the detected object
# output_dict['detection_scores_<X>'] = Percetage of the confidance
# output_dict['detection_boxes_<X>'] = A list of 2 tuples [(x1, y1) (x2, y2)]
# Where <X> is a zero-index count of num_detections


def ssd(output, confidance_threshold, shape):

    # Dictionary where the deserialized output will be stored
    output_dict = {}

    # Extract the original image's shape
    height, width, channel = shape

    # Total number of detections
    output_dict['num_detections'] = int(output[0])

    # Variable to track number of valid detections
    valid_detections = 0

    for detection in range(output_dict['num_detections']):

        # Skip the first 7 values, and point to the next batch of 7 values
        base_index = 7 + (7 * detection)

        # Record only those detections whose confidance meets our threshold
        if(output[base_index + 2] > confidance_threshold):

            output_dict['detection_classes_' + str(valid_detections)] = \
                int(output[base_index + 1])

            output_dict['detection_scores_' + str(valid_detections)] = \
                int(output[base_index + 2] * 100)

            x = [int(output[base_index + 3] * width),
                 int(output[base_index + 5] * width)]

            y = [int(output[base_index + 4] * height),
                 int(output[base_index + 6] * height)]

            output_dict['detection_boxes_' + str(valid_detections)] = \
                list(zip(y, x))

            valid_detections += 1

    # Update total number of detections to valid detections
    output_dict['num_detections'] = int(valid_detections)

    return(output_dict)

In [4]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help visualize the output from
# Intel® Movidius™ Neural Compute Stick (NCS)

def draw_bounding_box( y1, x1, y2, x2, 
                       img, 
                       thickness=4, 
                       color=(255, 255, 0),
                       display_str=() ):

    """ Inputs
    (x1, y1)  = Top left corner of the bounding box
    (x2, y2)  = Bottom right corner of the bounding box
    img       = Image/frame represented as numpy array
    thickness = Thickness of the bounding box's outline
    color     = Color of the bounding box's outline
    """

    img = PIL.Image.fromarray( img )
    draw = PIL.ImageDraw.Draw( img )

    for x in range( 0, thickness ):
        draw.rectangle( [(x1-x, y1-x), (x2-x, y2-x)], outline=color )

    font = PIL.ImageFont.load_default()
    draw.text( (x1, y1), display_str, font=font )

    return numpy.array( img )

## Step 1: Open the enumerated device and get a handle to it

In [6]:
def open_ncs_device():

    # Look for enumerated NCS device(s); quit program if none found.
    devices = mvnc.EnumerateDevices()
    if len(devices) == 0:
        print('No devices found')
        quit()

    # Get a handle to the first enumerated device and open it
    device = mvnc.Device(devices[0])
    device.OpenDevice()

    return device

## Step 2: Load a graph file onto the NCS device

In [7]:
def load_graph(device, graph_name):

    # Read the graph file into a buffer
    with open(graph_name, mode='rb') as f:
        blob = f.read()

    # Load the graph buffer into the NCS
    graph = device.AllocateGraph(blob)

    return graph

## Step 3: Pre-process the images

In [8]:
def pre_process_image(frame, dim, mean, scale):

    # Read & resize image
    # [Image size is defined by choosen network, during training]
    img = Image.fromarray(frame)
    img = img.resize(dim)
    img = numpy.array(img)

    # Mean subtraction & scaling [A common technique used to center the data]
    img = img.astype(numpy.float16)
    img = (img - numpy.float16(mean)) * scale

    return img

## Step 4: Read & print inference results from the NCS

In [9]:
def infer_image(graph, img, frame):

    # Load the image as a half-precision floating point array
    graph.LoadTensor(img, 'user object')

    # Get the results from NCS
    output, userobj = graph.GetResult()

    # Get execution time
    inference_time = graph.GetGraphOption(mvnc.GraphOption.TIME_TAKEN)

    # Deserialize the output into a python dictionary
    output_dict = deserialize_output.ssd(
                      output,
                      CONFIDANCE_THRESHOLD,
                      frame.shape)

    # Print the results (each image/frame may have multiple objects)
    for i in range(0, output_dict['num_detections']):

        # Filter a specific class/category
        if(output_dict.get('detection_classes_' + str(i)) == CLASS_PERSON):

            cur_time = strftime('%Y_%m_%d_%H_%M_%S', localtime())
            print('Person detected on ' + cur_time)

            # Extract top-left & bottom-right coordinates of detected objects
            (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
            (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]

            # Prep string to overlay on the image
            display_str = (
                labels[output_dict.get('detection_classes_' + str(i))]
                + ': '
                + str(output_dict.get('detection_scores_' + str(i)))
                + '%')

            # Overlay bounding boxes, detection class and scores
            frame = visualize_output.draw_bounding_box( 
                        y1, x1, y2, x2,
                        frame,
                        thickness=4,
                        color=(255, 255, 0),
                        display_str=display_str)

            # Capture snapshots
            img = Image.fromarray(frame)
            photo = (os.path.dirname(os.path.realpath(__file__))
                     + "/captures/photo_"
                     + cur_time + ".jpg")
            img.save(photo)

    # If a display is available, show image on which inference was performed
    if 'DISPLAY' in os.environ:
        img.show()

In [None]:
class image_classifier():
    def __init__(self,
                 graph_name='graph',
                 label_names='labels.txt'
                 mean=[127.5, 127.5, 127.5],
                 scale=0.00789,
                 dim=[300, 300],
                 colourmode='bgr'):
        self.mean = mean
        self.scale = scale
        self.dim = dim
        self.colourmode = colourmode

        self.device = open_ncs_device()
        self.graph = load_graph(device, graph_name)
        
            # Load the labels file
        labels = [line.rstrip('\n') for line in
                  open(label_names) if line != 'classes\n']
    
    def take_picture(self):
        with picamera.PiCamera() as camera:
            with picamera.array.PiRGBArray(camera) as self.frame:
                camera.resolution = (640, 480)
                camera.capture(frame, self.colourmode, use_video_port=True)
                self.frame = frame
                return frame
    
    def infer_image(self):
        img = pre_process_image(self.frame.array)
        infer_image(self.graph, img, self.frame.array)
    
    def close_ncs_device(self):
        self.graph.DeallocateGraph()
        self.device.CloseDevice()
    
    
    
# ---- Define 'main' function as the entry point for this script -------------

if __name__ == '__main__':
    parser.add_argument('-g', '--graph', type=str,
                        default='../../caffe/SSD_MobileNet/graph',
                        help="Absolute path to the neural network graph file.")

    parser.add_argument('-l', '--labels', type=str,
                        default='../../caffe/SSD_MobileNet/labels.txt',
                        help="Absolute path to labels file.")
# ==== End of file ===========================================================
