# Image Classifier

Contains code for the `ImageClassifer` class

Much of the code comes from the Movidius GitHub repo

I find development using a notebook to be quite a bit easier than developing using a regular python file. Unfortunately, you can't import a `.ipynb` as a module. So, here's the workflow:
1. Use this file to understand the code and make changes
2. Click `Kernal > Restart and Clear Output` to remove your outputs and make the file a bit smaller (shows up as fewer lines in the git commit)
3. Uncomment and run the line below to generate a `.py` file which can be imported as a module. Just make sure to remove your debugging code beforehand!

Main workflow when importing:
1. `import image_classifier`
2. `IC = image_classifier.ImageClassifier()`
3. Repeat:
    1. `img_array = IC.take_picture_and_start_inference()`
    2. `output_dict, bb, inference_time_ms = IC.get_inference_result()`
    3. `if bb == -1:`
       `    img = PIL.Image.fromarray(img_array)`
       `else:`
       `    (x1, y1), (x2, y2) = bb  # do stuff`

In [None]:
# Generate a .py file, which can be imported as a module
# !jupyter nbconvert --to=python image_classifier.ipynb

In [None]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation.
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# DIY smart security camera PoC using Raspberry Pi Camera and
# Intel® Movidius™ Neural Compute Stick (NCS)

import os
import numpy as np
import picamera
import picamera.array
import mvnc.mvncapi as mvnc
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont

from time import localtime, strftime

## Open the NCS device and load the graph file

In [None]:
def open_ncs_device():

    # Look for enumerated NCS device(s); quit program if none found.
    devices = mvnc.enumerate_devices()
    if len(devices) == 0:
        print('No NCS devices found')
        quit()

    # Get a handle to the first enumerated device and open it
    device = mvnc.Device(devices[0])
    device.open()

    return device

def load_graph(device, graph_file_name):
    # Read the graph file into a buffer
    with open(graph_file_name, mode='rb') as f:
        blob = f.read()

    # Load the graph buffer into the NCS
    graph = mvnc.Graph(graph_file_name)
    fifo_in, fifo_out = graph.allocate_with_fifos(device, blob)

    return graph, fifo_in, fifo_out

## Pre-process the image (resizing, scaling, and mean subtraction)

In [None]:
def pre_process_image(frame, dim, mean, scale):
    # Read & resize image
    # [Image size is defined by choosen network, during training]
    img = PIL.Image.fromarray(frame.array).resize(dim)
    img = np.array(img, dtype=np.float32)

    # Mean subtraction & scaling [A common technique used to center the data]
    img = (img - np.float32(mean)) * np.float32(scale)

    return img

## Get the inference result

In [None]:
def get_inference_result(graph, shape, fifo_in, fifo_out):
    # Get the results from NCS
    output, userobj = fifo_out.read_elem()

    # Get execution time
    inference_time = np.sum(graph.get_option(mvnc.GraphOption.RO_TIME_TAKEN))

    # Deserialize the output into a python dictionary
    output_dict = ssd(output, shape)
    
    return output_dict, inference_time

In [None]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help deserialize the output list from
# Intel® Movidius™ Neural Compute Stick (NCS)
def ssd(output, shape, confidance_threshold=0.6):
    """---- Deserialize the output from an SSD based network ----
    
    @param output The NCS returns a list/array in this structure:
        First float16: Number of detections
        Next 6 values: Unused
        Next consecutive batch of 7 values: Detection values
          0: Image ID (always 0)
          1: Class ID (index into labels.txt)
          2: Detection score
          3: Box left coordinate (x1) - scaled value between 0 & 1
          4: Box top coordinate (y1) - scaled value between 0 & 1
          5: Box right coordinate (x2) - scaled value between 0 & 1
          6: Box bottom coordinate (y2) - scaled value between 0 & 1

    @return output_dict A Python dictionary with the following keys:
        output_dict['num_detections'] = Total number of valid detections
        output_dict['detection_classes_<X>'] = Class ID of the detected object
        output_dict['detection_scores_<X>'] = Percentage of the confidance
        output_dict['detection_boxes_<X>'] = A list of 2 tuples [(x1, y1) (x2, y2)]
        Where <X> is a zero-index count of num_detections
    """

    # Dictionary where the deserialized output will be stored
    output_dict = {}

    # Extract the original image's shape
    height, width, channel = shape

    # Total number of detections
    output_dict['num_detections'] = int(output[0])

    # Variable to track number of valid detections
    valid_detections = 0

    for detection in range(output_dict['num_detections']):

        # Skip the first 7 values, and point to the next batch of 7 values
        base_index = 7 + (7 * detection)

        # Record only those detections whose confidance meets our threshold
        if(output[base_index + 2] > confidance_threshold):

            output_dict['detection_classes_' + str(valid_detections)] = int(output[base_index + 1])
            output_dict['detection_scores_' + str(valid_detections)] = int(output[base_index + 2] * 100)

            x = [int(output[base_index + 3] * width), int(output[base_index + 5] * width)]
            y = [int(output[base_index + 4] * height), int(output[base_index + 6] * height)]

            output_dict['detection_boxes_' + str(valid_detections)] = list(zip(y, x))

            valid_detections += 1

    # Update total number of detections to valid detections
    output_dict['num_detections'] = int(valid_detections)

    return output_dict

## Functions for displaying images

In [None]:
def display_image(output_dict, class_of_interest, frame, labels, capture_screenshots=False):
    # Print the results (each image/frame may have multiple objects)
    for i in range(0, output_dict['num_detections']):
        
        # Filter a specific class/category
        if (output_dict.get('detection_classes_' + str(i)) == class_of_interest):
            
            # Extract top-left & bottom-right coordinates of detected objects
            (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
            (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]

            # Prep string to overlay on the image
            display_str = (
                labels[output_dict.get('detection_classes_%i' % i)]
                + ': %s%%' % output_dict.get('detection_scores_%i' % i))

            # Overlay bounding boxes, detection class and scores
            frame = draw_bounding_box( 
                        y1, x1, y2, x2,
                        frame,
                        thickness=4,
                        color=(255, 255, 0),
                        display_str=display_str)

    if capture_screenshots:
        img = PIL.Image.fromarray(frame)
        img.save('captures/photo_%s.jpg' % cur_time)

    # If a display is available, show image on which inference was performed
    if 'DISPLAY' in os.environ:
        img.show()

In [None]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help visualize the output from
# Intel® Movidius™ Neural Compute Stick (NCS)

def draw_bounding_box(y1, x1, y2, x2, 
                      img, 
                      thickness=4, 
                      color=(255, 255, 0),
                      display_str=()):
    """ draw a bounding box on an image to help visualise the nn output
    
    Inputs
        (x1, y1)  = Top left corner of the bounding box
        (x2, y2)  = Bottom right corner of the bounding box
        img       = Image/frame represented as numpy array
        thickness = Thickness of the bounding box's outline
        color     = Color of the bounding box's outline
    """

    img = PIL.Image.fromarray(img)
    draw = PIL.ImageDraw.Draw(img)

    for x in range(0, thickness):
        draw.rectangle([(x1-x, y1-x), (x2-x, y2-x)], outline=color)

    font = PIL.ImageFont.load_default()
    draw.text((x1, y1), display_str, font=font)

    return np.array(img)

## The ImageClassifier class

In [None]:
class ImageClassifier():
    """ class to make the whole process of taking pictures etc easy
    
    methods:
        ImageClassifier.__init__(...)
        ImageClassifier.take_picture_and_start_inference()
        ImageClassifier.get_inference_result()
        ImageClassifier.close_ncs_device()        
    """
    def __init__(self,
                 graph_file='graph',
                 label_file='categories.txt',
                 mean=[127.5, 127.5, 127.5],
                 scale=0.00789, # 1/127
                 dim=[300, 300],
                 colourmode='rgb',
                 camera_resolution=(640, 480),
                 class_of_interest='person'):  # could also be 'dog', 'cat', etc (see categories.txt file)
        self.mean = mean
        self.scale = scale
        self.dim = dim
        self.colourmode = colourmode
        self.camera_resolution = camera_resolution
        
        # Load the labels file and get the index (=ID) of the class of interest
        self.labels = [line.rstrip('\n') for line in open(label_file) if line != 'classes\n']
        self.class_of_interest = self.labels.index(class_of_interest)

        # sort out the NCS stuff
        self.device = open_ncs_device()
        self.graph, self.fifo_in, self.fifo_out = load_graph(self.device, graph_file)
        
        
    def take_picture_and_start_inference(self):
        """ takes a picture and starts the inference, but doesn't wait for a """
        with picamera.PiCamera(resolution=self.camera_resolution) as camera:
            self.frame = picamera.array.PiRGBArray(camera)
            camera.capture(self.frame, self.colourmode)

            preprocessed_img = pre_process_image(self.frame, self.dim, self.mean, self.scale)

            # Load the image as a half-precision floating point array
            self.graph.queue_inference_with_fifo_elem(self.fifo_in, self.fifo_out, preprocessed_img, None)

            return self.frame.array


    def get_inference_result(self):
        output_dict, inference_time_ms = get_inference_result(self.graph,
                                                           self.frame.array.shape,
                                                           self.fifo_in,
                                                           self.fifo_out)
        
#         if 'DISPLAY' in os.environ:
#             display_image(output_dict, self.class_of_interest, self.frame, self.labels)
        
        for i in range(output_dict['num_detections']):
            if (output_dict.get('detection_classes_%i' % i) == self.class_of_interest):
                (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
                (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]
                bb = (x1, y1), (x2, y2)
                
                w, h = self.camera_resolution
                bb_scaled = ((x1/w) - 0.5, (y1/h) - 0.5), ((x2/w) - 0.5, (y2/h) - 0.5)
                return output_dict, bb, bb_scaled, inference_time_ms

        # couldn't find the class of interest
        return output_dict, -1, -1, inference_time_ms        

    
    def __del__(self):
        self.fifo_in.destroy()
        self.fifo_out.destroy()

        self.graph.destroy()

        self.device.close()
        self.device.destroy()

In [None]:
# IC = ImageClassifier(graph_file='../Models/MobileNet_SSD_caffe/graph',
#                      label_file='../Models/MobileNet_SSD_caffe/categories.txt')

In [None]:
# img_array = IC.take_picture_and_start_inference()
# output_dict, bb, inference_time_ms = IC.get_inference_result()

# print(output_dict)
# print(bb)
# print(inference_time_ms)

In [None]:
# if bb == -1:
#     img = PIL.Image.fromarray(img_array)
# else:
#     (x1, y1), (x2, y2) = bb
#     img = PIL.Image.fromarray(img_array)
#     img = PIL.Image.fromarray(draw_bounding_box(y1, x1, y2, x2, img_array))

# img

In [None]:
# del IC

In [None]:
# import timeit

In [None]:
# timeit.timeit(my_func, number=10)/10

In [None]:
# def my_func():
#     with picamera.PiCamera(resolution=(300,300)) as camera:
#         frame = picamera.array.PiRGBArray(camera)
#         camera.capture(frame, 'rgb')

In [None]:
# my_func()