# Image Classifier

Contains code for the `ImageClassifer` class

Much of the code comes from the Movidius GitHub repo

In [1]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation.
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# DIY smart security camera PoC using Raspberry Pi Camera and
# Intel® Movidius™ Neural Compute Stick (NCS)

import os
#import sys
import numpy as np
#import select
#import ntpath
import picamera
import picamera.array
import mvnc.mvncapi as mvnc
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont

from time import localtime, strftime

ModuleNotFoundError: No module named 'picamera'

## Open the NCS device and load the graph file

In [6]:
def open_ncs_device():

    # Look for enumerated NCS device(s); quit program if none found.
    devices = mvnc.EnumerateDevices()
    if len(devices) == 0:
        print('No NCS devices found')
        quit()

    # Get a handle to the first enumerated device and open it
    device = mvnc.Device(devices[0])
    device.OpenDevice()

    return device

def load_graph(device, graph_file_name):
    # Read the graph file into a buffer
    with open(graph_file_name, mode='rb') as f:
        blob = f.read()

    # Load the graph buffer into the NCS
    graph = device.AllocateGraph(blob)

    return graph

## Pre-process the image (resizing, scaling, and mean subtraction)

In [8]:
def pre_process_image(frame, dim, mean, scale):
    # Read & resize image
    # [Image size is defined by choosen network, during training]
    img = PIL.Image.fromarray(frame).resize(dim)
    img = np.array(img, dtype=np.float16)

    # Mean subtraction & scaling [A common technique used to center the data]
    img = (img - np.float16(mean)) * np.float16(scale)

    return img

## Get the inference result

In [9]:
def get_inference_result(graph, shape):
    # Get the results from NCS
    output, userobj = graph.GetResult()

    # Get execution time
    inference_time = graph.GetGraphOption(mvnc.GraphOption.TIME_TAKEN)

    # Deserialize the output into a python dictionary
    output_dict = ssd(output, shape)
    
    return output_dict, inference_time

In [None]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help deserialize the output list from
# Intel® Movidius™ Neural Compute Stick (NCS)
def ssd(output, shape, confidance_threshold=0.6):
    """---- Deserialize the output from an SSD based network ----
    
    @param output The NCS returns a list/array in this structure:
        First float16: Number of detections
        Next 6 values: Unused
        Next consecutive batch of 7 values: Detection values
          0: Image ID (always 0)
          1: Class ID (index into labels.txt)
          2: Detection score
          3: Box left coordinate (x1) - scaled value between 0 & 1
          4: Box top coordinate (y1) - scaled value between 0 & 1
          5: Box right coordinate (x2) - scaled value between 0 & 1
          6: Box bottom coordinate (y2) - scaled value between 0 & 1

    @return output_dict A Python dictionary with the following keys:
        output_dict['num_detections'] = Total number of valid detections
        output_dict['detection_classes_<X>'] = Class ID of the detected object
        output_dict['detection_scores_<X>'] = Percentage of the confidance
        output_dict['detection_boxes_<X>'] = A list of 2 tuples [(x1, y1) (x2, y2)]
        Where <X> is a zero-index count of num_detections
    """

    # Dictionary where the deserialized output will be stored
    output_dict = {}

    # Extract the original image's shape
    height, width, channel = shape

    # Total number of detections
    output_dict['num_detections'] = int(output[0])

    # Variable to track number of valid detections
    valid_detections = 0

    for detection in range(output_dict['num_detections']):

        # Skip the first 7 values, and point to the next batch of 7 values
        base_index = 7 + (7 * detection)

        # Record only those detections whose confidance meets our threshold
        if(output[base_index + 2] > confidance_threshold):

            output_dict['detection_classes_' + str(valid_detections)] = int(output[base_index + 1])
            output_dict['detection_scores_' + str(valid_detections)] = int(output[base_index + 2] * 100)

            x = [int(output[base_index + 3] * width), int(output[base_index + 5] * width)]
            y = [int(output[base_index + 4] * height), int(output[base_index + 6] * height)]

            output_dict['detection_boxes_' + str(valid_detections)] = list(zip(y, x))

            valid_detections += 1

    # Update total number of detections to valid detections
    output_dict['num_detections'] = int(valid_detections)

    return output_dict

## The ImageClassifier class

In [None]:
class ImageClassifier():
    """ class to make the whole process of taking pictures etc easy
    
    methods:
        ImageClassifier.__init__(...)
        ImageClassifier.take_picture_and_start_inference()
        ImageClassifier.get_inference_result()
        ImageClassifier.close_ncs_device()        
    """
    def __init__(self,
                 graph_file='graph',
                 label_file='categories.txt'
                 mean=[127.5, 127.5, 127.5],
                 scale=0.00789, # 1/127
                 dim=[300, 300],
                 colourmode='bgr',
                 camera_resolution=(640, 480),
                 class_of_interest='person'):  # could also be 'dog', 'cat', etc (see categories.txt file)
        self.mean = mean
        self.scale = scale
        self.dim = dim
        self.colourmode = colourmode
        
        # Load the labels file and get the index (=ID) of the class of interest
        labels = [line.rstrip('\n') for line in open(label_file) if line != 'classes\n']
        self.class_of_interest = labels.index(class_of_interest)

        # sort out the NCS stuff
        self.device = open_ncs_device()
        self.graph = load_graph(device, graph_file)
        
        # initalise the PiCamera
        self.camera = picamera.PiCamera()
        camera.resolution = camera_resolution
        
            
    def take_picture_and_start_inference(self):
        """ takes a picture and starts the inference, but doesn't wait for a """
#        self.frame = np.empty((240, 320, 3), dtype=np.uint8) ??
        self.frame = picamera.array.PiRGBArray(self.camera)
        self.camera.capture(self.frame, self.colourmode, use_video_port=True)

        preprocessed_img = pre_process_image(self.frame, self.dim, self.mean, self.scale)

        # Load the image as a half-precision floating point array
        self.graph.LoadTensor(preprocessed_img, 'user object')


    def get_inference_result(self):
        output_dict, inference_time = get_inference_result(self.graph, self.frame.array.shape)
        
        if 'DISPLAY' in os.environ:
            display_image(output_dict, self.class_of_interest, self.frame)
    
        for i in range(output_dict['num_detections']):
            if (output_dict.get('detection_classes_%i' % i) == self.class_of_interest):
                (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
                (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]
                bb = (x1 - 0.5, y1 - 0.5), (x2 - 0.5, y2 - 0.5)  # TODO: scaling??
                return output_dict, bb, inference_time
        
        # couldn't find the class of interest
        return -1, -1, -1
                
#         output_dict['detection_classes_<X>'] = Class ID of the detected object
#         output_dict['detection_scores_<X>'] = Percentage of the confidance
#         output_dict['detection_boxes_<X>'] = A list of 2 tuples [(x1, y1) (x2, y2)]
    
    def close_ncs_device(self):
        self.graph.DeallocateGraph()
        self.device.CloseDevice()

## Functions for displaying images

In [None]:
def display_image(output_dict, class_of_interest, frame, capture_screenshots=False):
    cur_time = strftime('%Y_%m_%d_%H_%M_%S', localtime())
    print('Detections for ' + cur_time)
    
    # Print the results (each image/frame may have multiple objects)
    for i in range(0, output_dict['num_detections']):
        
        # Filter a specific class/category
        if (output_dict.get('detection_classes_' + str(i)) == class_of_interest):
            
            # Extract top-left & bottom-right coordinates of detected objects
            (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
            (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]

            # Prep string to overlay on the image
            display_str = (
                labels[output_dict.get('detection_classes_%i' % i)]
                + ': %s%%' % output_dict.get('detection_scores_%i' % i))

            # Overlay bounding boxes, detection class and scores
            frame = draw_bounding_box( 
                        y1, x1, y2, x2,
                        frame,
                        thickness=4,
                        color=(255, 255, 0),
                        display_str=display_str)

    if capture_screenshots:
        img = PIL.Image.fromarray(frame)
        img.save('captures/photo_%s.jpg' % cur_time)

    # If a display is available, show image on which inference was performed
    if 'DISPLAY' in os.environ:
        img.show()

In [None]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help visualize the output from
# Intel® Movidius™ Neural Compute Stick (NCS)

def draw_bounding_box(y1, x1, y2, x2, 
                      img, 
                      thickness=4, 
                      color=(255, 255, 0),
                      display_str=()):
    """ draw a bounding box on an image to help visualise the nn output
    
    Inputs
        (x1, y1)  = Top left corner of the bounding box
        (x2, y2)  = Bottom right corner of the bounding box
        img       = Image/frame represented as numpy array
        thickness = Thickness of the bounding box's outline
        color     = Color of the bounding box's outline
    """

    img = PIL.Image.fromarray(img)
    draw = PIL.ImageDraw.Draw(img)

    for x in range(0, thickness):
        draw.rectangle([(x1-x, y1-x), (x2-x, y2-x)], outline=color)

    font = PIL.ImageFont.load_default()
    draw.text((x1, y1), display_str, font=font)

    return numpy.array(img)