# Image Classifier

Contains code for the `ImageClassifer` class, which loads a neural network on the Movidius NCS, takes pictures using a pi cam, passes the (preprocessed) pics through the stick and decodes the result into a bounding box around the category of your choice.

https://picamera.readthedocs.io/en/latest/fov.html

Main workflow when importing:
1. `import image_classifier`
2. `IC = image_classifier.ImageClassifier()`
3. Repeat:
    - `bb, bb_angles = IC.get_result()`
    - `if bb == -1:`
        - `    pass`
    - `else:`
        - `    (x1, y1), (x2, y2) = bb  # do stuff`

---

TODO: Some of the code comes from the Movidius GitHub repo, and should be attributed properly!

---

I find development using a notebook to be quite a bit easier than developing using a regular python file. Unfortunately, you can't import a `.ipynb` as a module. So, here's the workflow:
1. Use this file to understand the code and make changes
2. When you want to commit a change, click `Kernal > Restart and Clear Output` to remove your outputs + make the file a bit smaller (shows up as fewer lines in the git commit)
3. Run the command `jupyter nbconvert --to=python image_classifier.ipynb` to generate a `.py` file which can be imported as a module. Just make sure that any debugging code doesn't run if this is imported as a module into another file!

In [None]:
import os, time, multiprocessing, numpy as np, PIL.Image
import picamera, picamera.array
import utils

## Save (some) photos to disk

In [None]:
class PhotoSaver():
    """ class to help save every `n` photos passed to it """
    def __init__(self, every_n_photos=1, output_dir='logged_photos/'):
        self.every_n_photos = every_n_photos
        self.n = 0
        self.i = 0        
        
        self.output_dir = output_dir[:-1] # [:-1] to strip the '/'
        i = 0
        while self.output_dir in os.listdir():
            self.output_dir = output_dir[:-1] + str(i)
            i += 1
        
        os.mkdir(self.output_dir)
        self.output_dir = self.output_dir + '/'
    
    def save_photo(self, arr):
        np.save(self.output_dir + 'photo_%i.npy' % self.i, arr)
        self.i += 1
    
    def maybe_save_photo(self, arr):
        if self.n > self.every_n_photos:
            self.n = 0
            np.save(self.output_dir + 'photo_%i.npy' % self.i, arr)
            self.i += 1
        else:
            self.n += 1

## The ImageClassifier class

In [None]:
def picam_streamer(image_queue, e, # for multiprocessing
                  camera_resolution, colourmode,
                  nn_shape, scale, mean,
                  debug):

    if debug: print('picam_streamer: initialising camera')
    with picamera.PiCamera(resolution=camera_resolution, framerate=40, sensor_mode=5) as camera:
        frame = picamera.array.PiRGBArray(camera, size=(320, 304)) # closest to nn_shape of (300,300)
        cont_capture = camera.capture_continuous(frame, colourmode,
                                                 resize=(320, 304), # use GPU for resizing - will resize to nn_shape later
                                                 use_video_port=True)

        next(cont_capture)  # get the next frame in the continuous capture
        
        while True:
            if debug: t = time.time()

            if e.is_set():
                print('picam_streamer: shutting down')
                del cont_capture
                del frame
                break
            
            frame.seek(0)
            frame.truncate(0)
            next(cont_capture)
            img = np.array(PIL.Image.fromarray(frame.array).resize(nn_shape, PIL.Image.ANTIALIAS))
            photo_time = time.time()
            if debug: print('picam_streamer: time to capture + resize photo: %d [ms]' % ((photo_time-t)*1000))
            
            if debug: _t = time.time()
            preprocessed_img = (img - np.float32(mean)) * np.float32(scale)
            if debug: print('picam_streamer: time to preprocess image: %d [ms]' % ((time.time()-_t)*1000))
            
            if image_queue.qsize() <= 2:
                image_queue.put(preprocessed_img)
                image_queue.put(photo_time)
            elif debug:
                print('picam_streamer: skipping adding photo.  image_queue.qsize() =', image_queue.qsize())

            if debug: print('picam_streamer: total loop time: %d [ms]' % ((time.time()-t)*1000))

In [None]:
def nn_infer(image_queue, dict_queue, e, # for multiprocessing
             graph_filename,
             nn_shape,
             confidance_threshold,
             photo_logging_params,
             debug):

    if debug: print('nn_infer: importing MVNC library and opening NCS device')
    import mvnc.mvncapi as mvnc  # have to import the library here to get the thing to work, for some reason
    
    ## Start of Movidius code:
    devices = mvnc.enumerate_devices()
    if len(devices) == 0:
        print('nn_infer: no NCS devices found. Shutting down.')
        e.set()
        exit()

    # Get a handle to the first enumerated device and open it
    device = mvnc.Device(devices[0])
    device.open()

    # Read the graph file into a buffer
    with open(graph_filename, mode='rb') as f:
        blob = f.read()

    # Load the graph buffer into the NCS
    graph = mvnc.Graph(graph_filename)
    fifo_in, fifo_out = graph.allocate_with_fifos(device, blob)
    ## End of Movidius code
    
    PS = PhotoSaver(photo_logging_params[0], photo_logging_params[1]) # save every n=10 photos passed to this function

    while True:
        if debug: t = time.time()
            
        if e.is_set():
            print('nn_infer: shutting down')
            break

        preprocessed_img = image_queue.get()
        photo_time = image_queue.get()

        if debug: _t = time.time()
        graph.queue_inference_with_fifo_elem(fifo_in, fifo_out, preprocessed_img, None)
        if debug: print('nn_infer: queueing time: %d [ms]' % ((time.time()-_t)*1000))
        
        if debug: _t = time.time()
        PS.maybe_save_photo(preprocessed_img)
        if debug: print('nn_infer: save photo time: %d [ms]' % ((time.time()-_t)*1000))

        ## Start of Movidius code:
        if debug: _t = time.time()
        output, userobj = fifo_out.read_elem()
        if debug: print('nn_infer: result retrieval time: %d [ms]' % ((time.time()-_t)*1000))

        # Deserialize the output into a python dictionary
        output_dict = utils.deserialize_ssd(output, nn_shape, confidance_threshold) # this takes very little time
        ## End of Movidius code
        
        output_dict['photo_time'] = photo_time
        dict_queue.put(output_dict)
        if debug: print('nn_infer: dict_queue.qsize() =', dict_queue.qsize())
        if debug: print('nn_infer: loop time: %d [ms]' % ((time.time()-t)*1000))

In [None]:
class ImageClassifier():
    def __init__(self,
                 graph_filename='graph',
                 label_filename='categories.txt',  # must correspond to the specific network
                 class_of_interest='person',
                 colourmode='rgb',
                 camera_resolution=(1280, 720),# (width, height)
                 nn_shape=(300, 300),          # (width, height)
                 camera_FOV_deg=(62.2, 48.8),  # (width, height)):
                 mean=(127.5, 127.5, 127.5),   # depends on the colourmode
                 scale=0.00789,                # = 1/127
                 confidance_threshold=0.4,     # not that this isn't really a percentage as we know it - just a number that
                                               # represents prediction confidance in some way
                 photo_logging_params=(10, 'photo_logging/'), # save every `10` photos in `photo_logging/` dir
                 debug=False):
        
        labels = [line.rstrip('\n') for line in open(label_filename) if line != 'classes\n']
        self.class_of_interest = labels.index(class_of_interest) # note conversion from string to ID (int)
        self.nn_shape = nn_shape
        self.camera_FOV_deg = camera_FOV_deg

        self.image_queue = multiprocessing.Queue()
        self.dict_queue = multiprocessing.Queue()
        self.e = multiprocessing.Event()
        
        self.infer_process = multiprocessing.Process(
                                                target=nn_infer,
                                                args=(self.image_queue, self.dict_queue, self.e,
                                                      graph_filename,
                                                      nn_shape,
                                                      confidance_threshold,
                                                      photo_logging_params,
                                                      debug))
        
        self.photo_process = multiprocessing.Process(
                                                target=picam_streamer,
                                                args=(self.image_queue, self.e,
                                                      camera_resolution, colourmode,
                                                      nn_shape, scale, mean,
                                                      debug))

        self.infer_process.start()
        self.photo_process.start()
        if debug: print('ImageClassifier: started child processes')
    
    def get_result(self, debug=False):
        if debug: print('ImageClassifier: getting output dict')
        output_dict = self.dict_queue.get()
        bb, bb_angles = utils.output_dict_to_bb_and_angles(output_dict,
                                                           self.class_of_interest,
                                                           self.nn_shape,
                                                           self.camera_FOV_deg)
        return bb, bb_angles, output_dict['photo_time']
    
    def close(self): self.__del__()
    
    def __del__(self):
        self.e.set()  # setting the flag signals the other processes to shut down
        time.sleep(2)
        self.image_queue.close()
        self.dict_queue.close()
        del self