In [1]:
from __future__ import print_function

import cv2
import tensorflow.contrib.tensorrt as trt
import tensorflow as tf
import numpy as np
import ast
import pyrealsense2 as rs
import serial
import time

FRAME_WIDTH = 640
FRAME_HEIGHT = 480
GST_STR = 'nvarguscamerasrc \
    ! video/x-raw(memory:NVMM), width=3280, height=2464, format=(string)NV12, framerate=(fraction)30/1 \
    ! nvvidconv ! video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx \
    ! videoconvert \
    ! appsink' % (FRAME_WIDTH, FRAME_HEIGHT)
WINDOW_NAME = 'TF-TRT Object Detection'
MODEL_FILE = './ssd_inception_v2_coco_trt.pb'
LABEL_FILE = './coco-labels-paper.txt'

def load_graph_def(file):
    with tf.gfile.GFile(file, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    return graph_def

def load_labels(file):
    labels = ['unlabeled']
    with open(file, 'r') as f:
        for line in f.read().splitlines():
            labels.append(line)
    return labels

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
'''
A simple python application to detect objects from camera captured image
using TF-TRT for NVIDIA Jetson Nano Developer Kit.
This application assumes the TensorRT optimized ssd_mobilenet_v1_coco model.
Refer to the NVIDIA-AI-IOT/tf_trt_models GitHub ripository for details on 
the model.
'''

labels = load_labels(LABEL_FILE)
num_labels = len(labels)

print('Loading graph definition...', end = '', flush = True)
trt_graph_def = load_graph_def(MODEL_FILE)
print('Done.')

tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf_sess = tf.Session(config = tf_config)
print('Importing graph definition to TensorFlow...', \
    end = '', flush = True)
tf.import_graph_def(trt_graph_def, name = '')
print('Done.')

input_names = ['image_tensor']
tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0')
tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0')
tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0')
tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0')

#print('Configuring camera...', end = '', flush = True)
#cap = cv2.VideoCapture(GST_STR, cv2.CAP_GSTREAMER)
#print('Done.')


Loading graph definition...Done.
Importing graph definition to TensorFlow...Done.


In [39]:
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
profile = pipeline.start(config)
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
alp = 0.2
res = 1 - alp**0.5
val_size = 2
values = [0,0]
threshold = 3.
devide = 50
#with serial.Serial('/dev/ttyACM0', 9600,timeout = 1)  as sr:
with serial.Serial('/dev/ttyACM0', 115200,timeout=1)  as ser:

#with serial.Serial('/dev/ttyUSB0', 9600,timeout = 1)  as ser:
    while True:
        t1 = time.time()
        # Caputure frame
        frames = pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()
        RGB_frame = frames.get_color_frame()
        img = np.asanyarray(RGB_frame.get_data())
        imgConv = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
        imgRs = cv2.resize(imgConv, (300, 300))
        #imgRs = cv2.resize(imgConv, (640, 480))
        if not depth_frame or not RGB_frame:
            continue
        # Do inference
        scores, boxes, classes, num_detections \
        = tf_sess.run( \
            [tf_scores, tf_boxes, tf_classes, tf_num_detections], \
            feed_dict={tf_input: imgRs[None, ...]})

        boxes = boxes[0] # index by 0 to remove batch dimension
        scores = scores[0]
        classes = classes[0]
        num_detections = num_detections[0]
        ls_d = []
        ls_inf = []
        ls_box = []
        ls_ct = []
        #print(1)
        for i in range(int(num_detections)):
            # Look up label string
            
            class_id = int(classes[i])
            label = labels[class_id] if class_id < num_labels else 'unlabeled'
            if label == "person":
                depth_image = np.asanyarray(depth_frame.get_data())
                # Get score
                score = scores[i]
                # Draw bounding box
                box = boxes[i] * np.array( \
                    [FRAME_HEIGHT, FRAME_WIDTH, FRAME_HEIGHT, FRAME_WIDTH])
                box = box.astype(np.int)
                x1=box[1]
                y1=box[0]
                x2=box[3]
                y2=box[2]
                lx = (x2 - x1)
                ud = int(lx*res/2)
                u1 = x1 + ud
                u2 = x2 - ud
                ly = y2 - y1
                vd = int(ly*res/2)
                v1 = y1 + vd
                v2 = y2 - vd
                #cv2.rectangle(img,(box[1], box[0]), (box[3], box[2]), (0, 255, 0), 3)
                #cv2.rectangle(img,(box[1], box[0]+10), (box[3], box[2]-30), (255, 0, 0), 3)
                #cv2.rectangle(img, (u1, v1), (u2, v2), (255, 0, 0), 3)
                cx = int(lx/2)
                cy = int(ly/2)
                #depth = depth_image[cx,cy].astype(float)
                d_box = depth_image[v1:v2,u1:u2]
                #send center val
                center =(x1 + x2) / 2
                #distance = depth * depth_scale
                distance = np.mean(d_box) * depth_scale
                # Put label near bounding box
                inf = '%s: %f' % (label, score)
                d = distance
                ls_d.append(d)
                ls_inf.append(inf)
                ls_box.append(box[0:4])
                ls_ct.append(center)
                #print(inf)
        #print(2)
        if ls_d != []:   
            min_i = ls_d.index(min(ls_d))
            mbox = ls_box[min_i]
                        #min_center =ls_ct[min_i]
            min_d=ls_d[min_i]
            if min_d<=threshold:
                min_d = int(255*min_d/4.)
                center = ls_ct[min_i]
                center =int( devide * (center /640.) )
                values = [min_d,center]
                for i in range(val_size):
                    #print(3)
                    head = 128+i
                    high = (values[i] >> 7) & 127
                    low  = values[i] & 127
                    #print(4)
                    headByte = head.to_bytes(1, 'big')
                    highByte = high.to_bytes(1, 'big')
                    lowByte = low.to_bytes(1, 'big')
                    ser.write(headByte)
                    ser.write(highByte)
                    ser.write(lowByte)
                    #print(5)
                #print(values)
                cv2.putText(img, ls_inf[min_i], (mbox[1], mbox[2]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)
                cv2.putText(img, 'posi: %f' %(center-devide/2), (mbox[1], mbox[2]-25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)

        # Show image
        cv2.imshow(WINDOW_NAME, img)
        
        # Check if user hits ESC key to exit
        key = cv2.waitKey(1)
        if key == 27: # ESC 
            break
        t2 = time.time()
        
        
        #print('iteration')
ser.close() 
pipeline.stop()
cv2.destroyAllWindows()


KeyboardInterrupt: 

In [None]:
ser.close() 
pipeline.stop()
cv2.destroyAllWindows()

In [22]:
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
profile = pipeline.start(config)
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
alp = 0.05
res = 1 - alp**0.5
val_size = 2
values = [0,0]
threshold = 3.
#with serial.Serial('/dev/ttyACM0', 9600,timeout = 1)  as sr:
#with serial.Serial('/dev/ttyACM0', 9600,timeout = 1)  as ser:
while True:

    # Caputure frame
    frames = pipeline.wait_for_frames()
    depth_frame = frames.get_depth_frame()
    RGB_frame = frames.get_color_frame()
    img = np.asanyarray(RGB_frame.get_data())
    imgConv = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
    imgRs = cv2.resize(imgConv, (300, 300))
    #imgRs = cv2.resize(imgConv, (640, 480))
    if not depth_frame or not RGB_frame:
        continue
    # Do inference
    scores, boxes, classes, num_detections \
    = tf_sess.run( \
        [tf_scores, tf_boxes, tf_classes, tf_num_detections], \
        feed_dict={tf_input: imgRs[None, ...]})

    boxes = boxes[0] # index by 0 to remove batch dimension
    scores = scores[0]
    classes = classes[0]
    num_detections = num_detections[0]
    ls_d = []
    ls_inf = []
    ls_box = []
    ls_ct = []
    for i in range(int(num_detections)):
        # Look up label string

        class_id = int(classes[i])
        label = labels[class_id] if class_id < num_labels else 'unlabeled'
        if label == "person":
        #if label == "bottle":
            depth_image = np.asanyarray(depth_frame.get_data())
            # Get score
            score = scores[i]

            # Draw bounding box
            box = boxes[i] * np.array( \
                [FRAME_HEIGHT, FRAME_WIDTH, FRAME_HEIGHT, FRAME_WIDTH])
            box = box.astype(np.int)
            x1=box[1]
            y1=box[0]
            x2=box[3]
            y2=box[2]
            lx = (x2 - x1)
            ud = int(lx*res/2)
            u1 = x1 + ud
            u2 = x2 - ud
            ly = y2 - y1
            vd = int(ly*res/2)
            v1 = y1 + vd
            v2 = y2 - vd
            cv2.rectangle(img,(box[1], box[0]), (box[3], box[2]), (0, 255, 0), 3)
            #cv2.rectangle(img,(box[1], box[0]+10), (box[3], box[2]-30), (255, 0, 0), 3)
            cv2.rectangle(img, (u1, v1), (u2, v2), (255, 0, 0), 3)
            cx = int(lx/2)
            cy = int(ly/2)
            #depth = depth_image[cx,cy].astype(float)
            d_box = depth_image[v1:v2,u1:u2]
            #send center val
            center =(x1 + x2) / 2
            #distance = depth * depth_scale
            distance = np.mean(d_box) * depth_scale
            # Put label near bounding box
            inf = '%s: %f' % (label, score)
            d = distance
            ls_d.append(d)
            ls_inf.append(inf)
            ls_box.append(box[0:4])
            ls_ct.append(center)
            #print(inf)
    if ls_d != []:   
        min_i = ls_d.index(min(ls_d))
        mbox = ls_box[min_i]
                #min_center =ls_ct[min_i]
        min_d=ls_d[min_i]
        if min_d<=threshold:
            #min_d = int(255*min_d/4.)
            center = ls_ct[min_i]
            center =int( 255 * (center /640.) )
            values = [min_d,center]
            print(values)
            cv2.putText(img, ls_inf[min_i], (mbox[1], mbox[2]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)
            cv2.putText(img, 'distance: %f' %(min_d), (mbox[1], mbox[2]-25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 100), 1, cv2.LINE_AA)

    # Show image
    cv2.imshow(WINDOW_NAME, img)

    # Check if user hits ESC key to exit
    key = cv2.waitKey(1)
    if key == 27: # ESC 
        break
pipeline.stop()
cv2.destroyAllWindows()


[0.38795068116599024, 48]
[0.3729007980908779, 45]
[0.40504452473308716, 46]
[1.5096617258404192, 48]
[1.2423159925356773, 48]
[1.2217451328918734, 46]
[1.3955323305072116, 47]
[1.2118590344129652, 46]
[1.2578082358433116, 46]
[0.41070226309705343, 45]
[0.4351687690084976, 46]
[0.4390055544484819, 48]
[0.43917521048534447, 53]
[0.4435888346286601, 44]
[0.4310379515076859, 43]
[0.4049875664056876, 43]
[0.4041542578148206, 43]
[0.4001651133464448, 42]
[0.4001795061869929, 42]
[0.3996749403671879, 42]


KeyboardInterrupt: 

In [8]:
pipeline.stop()
cv2.destroyAllWindows()