# Image Classifier

Contains code for the `ImageClassifer` class, which loads a neural network on the Movidius NCS, takes pictures using a pi cam, passes the (preprocessed) pics through the stick and decodes the result into a bounding box around the category of your choice.

Main workflow when importing:
1. `import image_classifier`
2. `IC = image_classifier.ImageClassifier()`
3. Repeat:
    - `img_array = IC.take_picture_and_start_inference()`
    - `output_dict, bb, inference_time_ms = IC.get_inference_result()`
    - `if bb == -1:`
        - `    img = PIL.Image.fromarray(img_array)`
    - `else:`
        - `    (x1, y1), (x2, y2) = bb  # do stuff`

---

TODO: Much of the code comes from the Movidius GitHub repo, and should be attributed properly!

---

I find development using a notebook to be quite a bit easier than developing using a regular python file. Unfortunately, you can't import a `.ipynb` as a module. So, here's the workflow:
1. Use this file to understand the code and make changes
2. When you want to commit a change, click `Kernal > Restart and Clear Output` to remove your outputs + make the file a bit smaller (shows up as fewer lines in the git commit)
3. Run the command `jupyter nbconvert --to=python image_classifier.ipynb` to generate a `.py` file which can be imported as a module. Just make sure to remove your debugging code beforehand!

In [1]:
import os, time
import numpy as np
import picamera, picamera.array
import mvnc.mvncapi as mvnc
import PIL.Image, PIL.ImageDraw, PIL.ImageFont
from apscheduler.schedulers.background import BackgroundScheduler

## Open the NCS device and load the graph file

In [2]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation.
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# DIY smart security camera PoC using Raspberry Pi Camera and
# Intel® Movidius™ Neural Compute Stick (NCS)

def open_ncs_device():

    # Look for enumerated NCS device(s); quit program if none found.
    devices = mvnc.enumerate_devices()
    if len(devices) == 0:
        print('No NCS devices found')
        exit()

    # Get a handle to the first enumerated device and open it
    device = mvnc.Device(devices[0])
    device.open()

    return device

def load_graph(device, graph_file_name):
    # Read the graph file into a buffer
    with open(graph_file_name, mode='rb') as f:
        blob = f.read()

    # Load the graph buffer into the NCS
    graph = mvnc.Graph(graph_file_name)
    fifo_in, fifo_out = graph.allocate_with_fifos(device, blob)

    return graph, fifo_in, fifo_out

## Pre-process the image (resizing, scaling, and mean subtraction)

In [3]:
def pre_process_image(img, dim, mean, scale):
#     img = PIL.Image.fromarray(frame.array)#.resize(dim)
#     img = np.array(img, dtype=np.float32)
#     if img.shape != (dim[0], dim[1], 3):
#         img = img[:dim[0], :dim[1], :]
#         print('resizing in preprocess function')

    # Mean subtraction & scaling [A common technique used to center the data]
    img = (img - np.float32(mean)) * np.float32(scale)

    return img

## Get the inference result

In [4]:
def get_inference_result(graph, shape, fifo_in, fifo_out, confidance_threshold=0.6):
    # Get the results from NCS
    output, userobj = fifo_out.read_elem()

    # Get execution time
    inference_time = np.sum(graph.get_option(mvnc.GraphOption.RO_TIME_TAKEN))

    # Deserialize the output into a python dictionary
    output_dict = deserialize_ssd(output, shape, confidance_threshold)
    
    return output_dict, inference_time

In [5]:
# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help deserialize the output list from
# Intel® Movidius™ Neural Compute Stick (NCS)
def deserialize_ssd(output, shape, confidance_threshold):
    """---- Deserialize the output from an SSD based network ----
    
    @param output The NCS returns a list/array in this structure:
        First float16: Number of detections
        Next 6 values: Unused
        Next consecutive batch of 7 values: Detection values
          0: Image ID (always 0)
          1: Class ID (index into labels.txt)
          2: Detection score
          3: Box left coordinate (x1) - scaled value between 0 & 1
          4: Box top coordinate (y1) - scaled value between 0 & 1
          5: Box right coordinate (x2) - scaled value between 0 & 1
          6: Box bottom coordinate (y2) - scaled value between 0 & 1

    @return output_dict A Python dictionary with the following keys:
        output_dict['num_detections'] = Total number of valid detections
        output_dict['detection_classes_<X>'] = Class ID of the detected object
        output_dict['detection_scores_<X>'] = Percentage of the confidance
        output_dict['detection_boxes_<X>'] = A list of 2 tuples [(x1, y1) (x2, y2)]
        Where <X> is a zero-index count of num_detections
    """

    output_dict = {}                # Dictionary where the deserialized output will be stored
    height, width, channel = shape  # Extract the original image's shape
    output_dict['num_detections'] = int(output[0])  # Total number of detections
    num_valid_detections = 0

    for detection in range(output_dict['num_detections']):
        base_index = 7 + (7 * detection)  # Skip the first 7 values

        if (output[base_index + 2] > confidance_threshold):
            output_dict['detection_classes_' + str(num_valid_detections)] = int(output[base_index + 1])
            output_dict['detection_scores_' + str(num_valid_detections)] = int(output[base_index + 2] * 100)

            x = [int(output[base_index + 3] * width), int(output[base_index + 5] * width)]
            y = [int(output[base_index + 4] * height), int(output[base_index + 6] * height)]

            output_dict['detection_boxes_' + str(num_valid_detections)] = list(zip(y, x))

            num_valid_detections += 1

    # Update total number of detections to valid detections
    output_dict['num_detections'] = int(num_valid_detections)

    return output_dict

In [6]:
def output_dict_to_bb_and_angles(output_dict, class_of_interest, camera_resolution, camera_FOV):
    for i in range(output_dict['num_detections']):
        if (output_dict.get('detection_classes_%i' % i) == class_of_interest):
            (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
            (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]
            bb = (x1, y1), (x2, y2)

            w, h = camera_resolution
            cam_width = camera_FOV[0]
            cam_height = camera_FOV[1]
            x1_angle = pixel_to_angle(x1, w, cam_width)
            x2_angle = pixel_to_angle(x2, w, cam_width)
            y1_angle = pixel_to_angle(y1, h, cam_height)
            y2_angle = pixel_to_angle(y2, h, cam_height)
            bb_angles = ((x1_angle, y1_angle), (x2_angle, y2_angle))

            return bb, bb_angles

## Functions for displaying images

In [7]:
def display_image(output_dict, class_of_interest, frame, labels, capture_screenshots=False):
    # Print the results (each image/frame may have multiple objects)
    for i in range(output_dict['num_detections']):
        
        if (output_dict.get('detection_classes_' + str(i)) == class_of_interest):
            
            # Extract top-left & bottom-right coordinates of detected objects
            (y1, x1) = output_dict.get('detection_boxes_' + str(i))[0]
            (y2, x2) = output_dict.get('detection_boxes_' + str(i))[1]

            # Prep string to overlay on the image
            display_str = (labels[output_dict.get('detection_classes_%i' % i)]
                           + ': %s%%' % output_dict.get('detection_scores_%i' % i))

            # Overlay bounding boxes, detection class and scores
            frame = draw_bounding_box( 
                        y1, x1, y2, x2,
                        frame, display_str=display_str)

    if capture_screenshots:
        img = PIL.Image.fromarray(frame)
        img.save('captures/photo_%s.jpg' % cur_time)

    # If a display is available, show image on which inference was performed
    if 'DISPLAY' in os.environ:
        img.show()

# ****************************************************************************
# Copyright(c) 2017 Intel Corporation. 
# License: MIT See LICENSE file in root directory.
# ****************************************************************************

# Utilities to help visualize the output from
# Intel® Movidius™ Neural Compute Stick (NCS)
def draw_bounding_box(y1, x1, y2, x2, 
                      img, 
                      thickness=4, 
                      color=(255, 255, 0),
                      display_str=()):
    """ draw a bounding box on an image to help visualise the nn output
    
    Inputs
        (x1, y1)  = Top left corner of the bounding box
        (x2, y2)  = Bottom right corner of the bounding box
        img       = Image/frame represented as numpy array
        thickness = Thickness of the bounding box's outline
        color     = Color of the bounding box's outline
    """
    img = PIL.Image.fromarray(img)
    draw = PIL.ImageDraw.Draw(img)

    for x in range(0, thickness):
        draw.rectangle([(x1-x, y1-x), (x2-x, y2-x)], outline=color)

    font = PIL.ImageFont.load_default()
    draw.text((x1, y1), display_str, font=font)

    return np.array(img)

## Utils

In [8]:
def pixel_to_angle(pixel, img_size, cam_angle_deg):
    """ convert a pixel value to an angle in degrees
    inputs:
        pixel:              a value from 0 to img_size
        img_size:           the total number of pixels along that axis of the image
        cam_angle_deg:      the angular width of camera in degrees
        
    output:
        angle:      0deg = when pixel is directly ahead, positive/negative = right/left of center
    """
    norm_pixel = (2*pixel/img_size) - 1  # now in range [-1, 1]
    angle = np.arctan(norm_pixel * np.tan(np.deg2rad(cam_angle_deg/2)))
    return np.rad2deg(angle)

## The ImageClassifier class

In [9]:
class ImageClassifier():
    """ class to make the whole process of taking pictures etc easy
    
    methods:
        ImageClassifier.__init__(...)
        ImageClassifier.take_picture_and_start_inference()
        ImageClassifier.get_inference_result()
        ImageClassifier.__del__()
    """
    def __init__(self,
                 graph_file='graph',
                 label_file='categories.txt',  # must correspond to the specific network
                 mean=(127.5, 127.5, 127.5),   # depends on the colourmode
                 scale=0.00789,                # = 1/127
                 nn_dim=(300, 300),            # (width, height)
                 colourmode='rgb',
                 camera_resolution=(1640, 922),# (width, height)
                 camera_FOV=(62.2, 48.8),      # (width, height)
                 class_of_interest='person',   # could also be 'dog', 'cat', etc (see categories.txt file)
                 debug=False):
        
        ##### COPY ARGS TO OBJECT #####
        self.mean = mean
        self.scale = scale
        self.dim = nn_dim
        self.colourmode = colourmode
        self.camera_resolution = camera_resolution
        self.camera_FOV = camera_FOV
        self.debug = debug
        
        ##### LOAD THE LABELS FILE #####
        self.labels = [line.rstrip('\n') for line in open(label_file) if line != 'classes\n']
        self.class_of_interest = self.labels.index(class_of_interest) # note conversion from string to ID (int)
        
        ##### OPEN AND INIT THE NCS #####
        self.device = open_ncs_device()
        self.graph, self.fifo_in, self.fifo_out = load_graph(self.device, graph_file)
        
        ##### OPEN THE CAMERA AND START STREAMING #####
        self.camera = picamera.PiCamera(resolution=self.camera_resolution, framerate=90)
        self.frame = picamera.array.PiRGBArray(self.camera, size=nn_dim)
        self.cont_capture = self.camera.capture_continuous(self.frame, colourmode, resize=nn_dim, use_video_port=True)# use GPU for resizing
        
        ##### MULTITHREADING #####
        self.scheduler = BackgroundScheduler()
        
        self.job_take_pics            = self.scheduler.add_job(self.take_pics, 'interval', seconds=10, max_instances=1) #8 FPS
        self.job_preprocess_and_queue = self.scheduler.add_job(self.preprocess_and_queue, 'interval', seconds=10, max_instances=1)
        self.job_get_results          = self.scheduler.add_job(self.get_inference_result, 'interval', seconds=10, max_instances=1)
        
        self.take_pics_done = False
        self.preprocess_and_queue_done = False
        self.results_are_ready = False
        
        self.scheduler.start()
        
        
    def take_pics(self):
        """ launches every 0.125 seconds = 8 Hz"""
        while True:
            self.frame.seek(0)
            self.frame.truncate(0)
            next(self.cont_capture)  # next frame in the continuous capture

            self.take_pics_done = True
    
    
    def preprocess_and_queue(self, debug=False):
        while True:
            if self.take_pics_done is True:
                preprocessed_img = pre_process_image(self.frame.array, self.dim, self.mean, self.scale)
                self.graph.queue_inference_with_fifo_elem(self.fifo_in, self.fifo_out, preprocessed_img, None)

                self.take_pics_done = False
                self.preprocess_and_queue_done = True


    def get_inference_result(self):
        while True:
            if self.preprocess_and_queue_done is True:
                self.output_dict, inference_time_ms = get_inference_result(self.graph,
                                                                           self.frame.array.shape,
                                                                           self.fifo_in,
                                                                           self.fifo_out)

                self.bb, self.bb_angles = output_dict_to_bb_and_angles(output_dict,
                                                             self.class_of_interest,
                                                             self.camera_resolution,
                                                             self.camera_FOV)

                self.new_results_are_ready = True
    
    
    def get_results(self):
        if self.new_results_are_ready is True:
            self.new_results_are_ready = False
            return self.output_dict, self.bb, self.bb_angles
        else:
            return -1, -1, -1

    def close(self): self.__del__()
    
    def __del__(self):
        print('Closing threads...')
        self.job_take_pics.close()
        self.job_preprocess_and_queue.close()
        self.job_get_results.close()
        self.scheduler.close()
        
        print('Closing NCS...')
        self.fifo_in.destroy()
        self.fifo_out.destroy()
        self.graph.destroy()
        self.device.close()
        self.device.destroy()
        
        print('Closing PiCam...')
        self.camera.close()
        
        print('Closed all successfully')

In [10]:
# IC.__del__()

In [11]:
IC = ImageClassifier(graph_file='../Models/MobileNet_SSD_caffe/graph',
                     label_file='../Models/MobileNet_SSD_caffe/categories.txt',
                     camera_resolution=(1280,720))#(320,240))#(1640, 922)(640,480)(1280,720)

In [None]:
t = time.time()
im = IC.take_picture_and_start_inference(debug=True)
print('pic:::', time.time() - t)

IC.get_inference_result(debug=True)
print('total:::', time.time() - t)

In [None]:
# print(im.array.shape)
# PIL.Image.fromarray(im.array)

In [12]:
IC.get_results()

AttributeError: 'ImageClassifier' object has no attribute 'new_results_are_ready'

In [None]:
# import time
# import numpy as np
# from apscheduler.schedulers.background import BackgroundScheduler

# import logging
# logging.getLogger('apscheduler.executors.default').setLevel(logging.CRITICAL)

In [None]:
# class Stest:
#     def __init__(self):
#         self.scheduler = BackgroundScheduler()
#         self.job1 = self.scheduler.add_job(self.make_data, 'interval', seconds=1, max_instances=1)
#         self.job2 = self.scheduler.add_job(self.modify_data, 'interval', seconds=1, max_instances=1)
#         self.job3 = self.scheduler.add_job(self.output_data, 'interval', seconds=1, max_instances=1)
#         self.num = 1
#         self.thing1done = False
#         self.thing2done = False
#         self.scheduler.start()

#     def make_data(self):
#         print('thing1 running')
#         self.my_arr = np.empty(shape=(3000, 3000))
#         self.thing1done = True
    
#     def modify_data(self):
#         if self.thing1done:
#             print('modifying data')
#             self.my_arr2 = self.my_arr @ self.my_arr
#             self.thing1done = False
#             self.thing2done = True

#     def output_data(self):
#         if self.thing2done:
#             print('outputting data')
#             self.my_arr3 = self.my_arr2 @ self.my_arr2
#             self.thing2done = False
    
#     def close(self):
#         self.job1.remove()
#         self.job2.remove()
#         self.job3.remove()
#         self.scheduler.shutdown()

In [None]:
# s = Stest()
# time.sleep(10)
# s.close()

In [None]:
# import time
# import numpy as np
# from apscheduler.schedulers.background import BackgroundScheduler

# class Stest:
#     def __init__(self):
#         self.scheduler = BackgroundScheduler()
#         self.job1 = self.scheduler.add_job(self.make_data, 'interval', seconds=1, max_instances=1)
#         self.job2 = self.scheduler.add_job(self.modify_data, 'interval', seconds=0.001, max_instances=1)
#         self.job3 = self.scheduler.add_job(self.output_data, 'interval', seconds=0.001, max_instances=1)
#         self.num = 1
#         self.thing1done = False
#         self.thing2done = False
#         self.scheduler.start()

#     def make_data(self):
#         print('thing1 running')
#         self.my_arr = np.array([self.num, self.num + 1])
#         self.num += 1
#         self.thing1done = True
#         time.sleep(2)
    
#     def modify_data(self):
#         if self.thing1done:
#             print('modifying data')
#             self.my_arr[0] += 10
#             self.thing1done = False
#             self.thing2done = True

#     def output_data(self):
#         if self.thing2done:
#             print(self.my_arr)
#             self.thing2done = False
    
#     def close(self):
#         self.job1.remove()
#         self.job2.remove()
#         self.job3.remove()
#         self.scheduler.shutdown()

In [None]:
# # import the necessary packages
# from threading import Thread
# import cv2
# import imutils

# class WebcamVideoStream:
#     def __init__(self, src=0):
#         # initialize the video camera stream and read the first frame
#         # from the stream
#         self.camera_resolution=(1280,920)
#         self.nn_dim = (300,300)
#         self.colourmode = 'bgr'
#         self.camera = picamera.PiCamera(resolution=self.camera_resolution, framerate=90)
#         self.frame = picamera.array.PiRGBArray(self.camera, size=nn_dim)  #
#         self.cont_capture = self.camera.capture_continuous(self.frame, self.colourmode, resize=nn_dim, use_video_port=True)# #use GPU to resize to dim

#         # initialize the variable used to indicate if the thread should
#         # be stopped
#         self.stopped = False

#     def start(self):
#         # start the thread to read frames from the video stream
#         Thread(target=self.update, args=()).start()
#         return self

#     def update(self):
#         # keep looping infinitely until the thread is stopped
#         while True:
#             # if the thread indicator variable is set, stop the thread
#             if self.stopped:
#                 return

#             # otherwise, read the next frame from the stream
#             (self.grabbed, self.frame) = self.stream.read()

#     def read(self):
#         # return the frame most recently read
#         return self.frame

#     def stop(self):
#         # indicate that the thread should be stopped
#         self.stopped = True

# vs = WebcamVideoStream(src=0).start()
# vs

# frame = vs.read()
# frame