## Initialize

In [None]:
import os
import sys
import time
import threading
import cProfile

In [None]:
import cv2
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm

In [None]:
import tensorflow as tf

In [None]:
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    # Don't allocate huge memory unnecessarily
    tf.config.experimental.set_memory_growth( gpus[0], True)


## Define panoramasdk wrapper functions

In [None]:
import panoramasdk

node = panoramasdk.node()

In [None]:
latest_media_list = []

class CopiedMedia:
    pass

def getMediasFromCamera():
    
    copied_media_list = []

    media_list = node.inputs.video_in.get()
    
    latest_media_list[:] = media_list
    
    for media_obj in media_list:
        copied_media = CopiedMedia()
        copied_media.image = media_obj.image.copy()
        copied_media.is_cached = media_obj.is_cached
        copied_media.stream_uri = media_obj.stream_uri
        copied_media.stream_id = media_obj.stream_id
        copied_media.time_stamp = media_obj.time_stamp
        copied_media_list.append(copied_media)
    
    return copied_media_list

def putMediasToHdmi(copied_media_list):
    
    for media_obj, copied_media in zip( latest_media_list, copied_media_list ):
        media_obj.image[:] = copied_media.image
    
    node.outputs.video_out.put(latest_media_list)


## Confirm input from Camera and ouput to HDMI

In [None]:
media_list = getMediasFromCamera()

media_list

In [None]:
media_list[0].image.shape, media_list[0].image.dtype

In [None]:
def previewImage( image ):
    
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    plt.figure( figsize = ( 10, 10 ) )
    plt.imshow( image_rgb, interpolation='antialiased' )

In [None]:
previewImage(media_list[0].image)

In [None]:
putMediasToHdmi(media_list[:1])

## Download and load model

In [None]:
if not os.path.exists( "./ssd_mobilenet_v2_320x320_coco17_tpu-8" ):
    !wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz
    !tar xvzf ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz

In [None]:
model = tf.saved_model.load("./ssd_mobilenet_v2_320x320_coco17_tpu-8/saved_model/")
model

In [None]:
detector = model.signatures["serving_default"]
detector

## Detect objects

In [None]:
input_resolution = ( 300, 300 )
    
def preprocessAndDetect( image_list ):
    
    np_images = np.array( image_list )
    
    tf_input = tf.convert_to_tensor( np_images )

    tf_input = tf.image.resize( tf_input, input_resolution )

    # BGR to RGB
    tf_input = tf.reverse(tf_input, axis=[-1])

    tf_input = tf.cast( tf_input, dtype=tf.uint8 )

    result = detector(tf_input)

    return result
    

In [None]:
detection_result = preprocessAndDetect( [ media_list[0].image ] )

detection_result

In [None]:
score_threshold = 0.5
box_color = (255,0,0)
box_thickness = 2

def renderResult( image, detection_result ):
    
    h, w, _ = image.shape

    detection_classes = detection_result["detection_classes"][0].numpy()
    detection_scores = detection_result["detection_scores"][0].numpy()
    detection_boxes = detection_result["detection_boxes"][0].numpy()

    for klass, score, box in zip( detection_classes, detection_scores, detection_boxes ):
        if klass == 1: # person
            if score >= score_threshold:
    
                box_in_camera_space = (
                    int( box[1].item() * w ),
                    int( box[0].item() * h ),
                    int( box[3].item() * w ),
                    int( box[2].item() * h ), 
                )

                cv2.rectangle( 
                    image, 
                    box_in_camera_space[0:2], 
                    box_in_camera_space[2:4], 
                    color = box_color, thickness = box_thickness, lineType=cv2.LINE_8
                )


In [None]:
renderResult( media_list[0].image, detection_result )

previewImage(media_list[0].image)

## Track positions

In [None]:
people_positions_x = []
people_positions_y = []
people_positions_timestamp = []

forget_after = 30 # 30 sec
#forget_after = 5 * 60 # 5min
#forget_after = 60 * 60 # 1hour

def trackPeoplePositions( detection_result ):

    global people_positions_x, people_positions_y, people_positions_timestamp

    t_now = time.time()

    #num_detections = float( result["num_detections"][0] )
    detection_classes = detection_result["detection_classes"][0].numpy()
    detection_scores = detection_result["detection_scores"][0].numpy()
    detection_boxes = detection_result["detection_boxes"][0].numpy()

    # add detected positions (bottom-center of boxes)
    for klass, score, box in zip( detection_classes, detection_scores, detection_boxes ):
        if klass == 1: # person
            if score >= 0.5:
                people_positions_x.append( ( box[1] + box[3] ) * 0.5 )
                people_positions_y.append( box[2] )
                people_positions_timestamp.append( t_now )

    # forget old positions
    for i, t in enumerate( people_positions_timestamp ):
        if t > t_now-forget_after:
            break

    people_positions_x = people_positions_x[i:]
    people_positions_y = people_positions_y[i:]
    people_positions_timestamp = people_positions_timestamp[i:]

    #print( "Number of data points :", len(people_positions_timestamp) )


In [None]:
trackPeoplePositions( detection_result )

## Render heatmap

In [None]:
heatmap_resolution = (90,160)
heatmap_sigma = 5

def renderHeatmap():

    fig, ax1 = plt.subplots( nrows = 1, ncols = 1, figsize=( 16, 9 ) )

    img, xedges, yedges = np.histogram2d( people_positions_y, people_positions_x, bins=heatmap_resolution, range=((0,1),(0,1)) )
    
    img = cv2.GaussianBlur( img, (0,0), heatmap_sigma, cv2.BORDER_DEFAULT )

    ax1.axis("off")
    ax1.imshow(img, cmap=matplotlib.cm.jet)

    fig.tight_layout( pad=0 )

    fig.canvas.draw()
    img = np.frombuffer( fig.canvas.tostring_rgb(), dtype=np.uint8 )
    fig_w, fig_h = fig.canvas.get_width_height()
    img = img.reshape( ( fig_h, fig_w, 3 ) )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    plt.close(fig)

    return img


In [None]:
heatmap = renderHeatmap()

heatmap.shape, heatmap.dtype

In [None]:
previewImage( heatmap )

In [None]:
def overlayHeatmap( dst_image, heatmap, weight=0.5 ):
    resized_heatmap = cv2.resize( heatmap, ( dst_image.shape[1], dst_image.shape[0] ))
    blended = cv2.addWeighted( dst_image, 1-weight, resized_heatmap, weight, 0.0 )
    dst_image[:,:,:] = blended

In [None]:
overlayHeatmap( media_list[0].image, heatmap, 0.5 )

previewImage( media_list[0].image )

## Render demo title

In [None]:
text_color = (255,255,255)
text_shadow_color = (0,0,0)
text_thickness = 2
text_shadow_thickness = 2
text_scale = 2

def renderTitle( image, s ):

    h, w, _ = image.shape
    
    cv2.putText( image, s, (22, 40+2), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=text_scale, color=text_shadow_color, thickness=text_shadow_thickness, lineType=cv2.LINE_AA )
    cv2.putText( image, s, (20, 40), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=text_scale, color=text_color, thickness=text_thickness, lineType=cv2.LINE_AA )


In [None]:
renderTitle( media_list[0].image, "Retail - traffic analysis by heatmap" )

previewImage( media_list[0].image )

## Mainloop (single thread)

In [None]:
def mainLoop():
    try:
        while True:
            
            media_list = getMediasFromCamera()
            
            detection_result = preprocessAndDetect( [ media_list[0].image ] )

            trackPeoplePositions(detection_result)
            
            heatmap = renderHeatmap()
            
            overlayHeatmap( media_list[0].image, heatmap )
            
            renderResult( media_list[0].image, detection_result )
            
            renderTitle( media_list[0].image, "Retail - traffic analysis by heatmap" )
            
            putMediasToHdmi( media_list[:1] )
            
    except KeyboardInterrupt:
        pass

In [None]:
#mainLoop()
#cProfile.runctx( "mainLoop()", globals(), locals() )

## Multi threaded mainloop for smoother video on HDMI

In [None]:
class InferenceThread(threading.Thread):
    
    def __init__(self):
        threading.Thread.__init__(self)
        self.is_canceled = False
        self.queue = []
        self.heatmap = None
        self.lock = threading.Lock()
        self.batch_size = 1
    
    def enqueue( self, image ):
        self.lock.acquire()
        try:
            if len(self.queue) < self.batch_size + 1:
                self.queue.append(image)
        finally:
            self.lock.release()
    
    def run(self):
        while not self.is_canceled:
            
            self.lock.acquire()
            try:
                if len(self.queue)>=self.batch_size:                    
                    batch = self.queue[:self.batch_size]
                    del self.queue[:self.batch_size]
                else:
                    time.sleep(0.1)
                    continue                    
            finally:
                self.lock.release()
                    
            detection_result = preprocessAndDetect(batch)
            trackPeoplePositions(detection_result)
            self.heatmap = renderHeatmap()
    
    def cancel(self):
        self.is_canceled = True

def mainLoop():
    inference_thread = InferenceThread()
    inference_thread.start()
    try:
        while True:
            
            media_list = getMediasFromCamera()
            
            inference_thread.enqueue( media_list[0].image )
            
            if inference_thread.heatmap is not None:
                overlayHeatmap( media_list[0].image, inference_thread.heatmap )
            
            #renderResult( media_list[0].image, detection_result )
            
            renderTitle( media_list[0].image, "Retail - traffic analysis by heatmap" )
            
            putMediasToHdmi( media_list[:1] )
            
    except KeyboardInterrupt:
        pass
    
    finally:
        inference_thread.cancel()
        inference_thread.join()


In [None]:
mainLoop()