### This notebook is used to communicate with Unity VirtualHome over a socket
### Each frame from Unity is sent directly to this Python Notebook where it's processed before being passed into a trained YOLACT Model for Inference in Real-Time. 
### The YOLACT Model is trained on a synthetic home indoor dataset

Author: Adam Goldstein



In [1]:
# Imports

import numpy as np
import urllib
import time
import cv2
from yolact_edge.inference import YOLACTEdgeInference
import zmq
import struct
import gzip


In [2]:
# Load Model

# weights = "yolact_edge/weights/yolact_base_54_800000.pth"
# weights = "weights/yolact_edge_54_800000.pth"
# weights = "weights/yolact_base_26666_1280000.pth"
# weights = "yolact_edge/weights/synthetic_home_20456_900084_interrupt.pth"
weights = "weights/yolact_base_20454_900000.pth"
# All available model configs, depends on which weights

In [3]:
# Load YOLACT Configuration

model_configs = [
    'yolact_base_config',
    'yolact_edge_config',
    'yolact_edge_mobilenetv2_config',
    'yolact_edge_vid_config',
    'yolact_edge_vid_minimal_config',
    'yolact_edge_vid_trainflow_config',
    'yolact_edge_youtubevis_config',
    'yolact_resnet50_config',
    'yolact_resnet152_config',
    'yolact_edge_resnet50_config',
    'yolact_edge_vid_resnet50_config',
    'yolact_edge_vid_trainflow_resnet50_config',
    'yolact_edge_youtubevis_resnet50_config',
]
config = model_configs[1]

# Load dataset
datasets = [
    'SyntheticHome',
    'coco2014_dataset',
    'coco2017_dataset',
    'ConcatDataset',
    'coco2017_testdev_dataset',
    'flying_chairs_dataset',
    'youtube_vis_dataset',
]
dataset = datasets[0]
# Used tensorrt calibration
calib_images = "./data/calib_images"
# Override some default configuration
config_ovr = {
    '--fast_nms': True,  # Does not work with regular nms
    'mask_proto_debug': False,
   #  '--use_tensorrt_safe_mode': True,
}
!export CUDA_MODULE_LOADING=LAZY
# Enable lazy loading


# Load Model
model_inference = YOLACTEdgeInference(
    weights, config, dataset, calib_images, config_ovr)

# Total time to load = 3m 30s

Configuring YOLACT edge...
Loading YOLACT edge model...
Loading weights from 'weights/yolact_base_20454_900000.pth'...
[03/07/2023-14:35:40] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/07/2023-14:35:40] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/07/2023-14:35:40] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/07/2023-14:35:40] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming

In [4]:
context = zmq.Context()
socket = context.socket(zmq.SUB)

# Subscribe to all messages
socket.connect("tcp://localhost:5555")
socket.setsockopt(zmq.SUBSCRIBE, b"")

# print("Waiting for messages...")

print('Starting up on {} port {}'.format('localhost', 5555))

print("Waiting for messages...")

payload_size = struct.calcsize("Q")

data = b""

Starting up on localhost port 5555
Waiting for messages...


In [5]:
print("Benchmarking performance...")
start = time.time()
prev_time = 0

while True:
    t1 = time.time()
    try:
        while len(data) < payload_size:
            packet = socket.recv()

            if not packet:
                break
            data += packet
        
        image_data = gzip.decompress(data)

        img = np.frombuffer(image_data, dtype=np.uint8)

        img = img.reshape((480, 640, 4))
        img = cv2.resize(img, (1280, 960))

        # Flip image
        img = cv2.flip(img, 0)
        # Convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)

        # img = cv2.convertScaleAbs(img, alpha=3.5, beta=5)

        # Time inference
        p = model_inference.predict(img, False)

        print(f"Average {1 / (time.time() - prev_time)} FPS")

        if p:
            cv2.imshow("Unity Cam", p['img'])
            # cv2.resizeWindow("Garnet Cam", 1280, 960)
            # Change window size
            cv2.waitKey(1)
        else:
            print("No prediction")

        prev_time = time.time()
        data = b""
        t2 = time.time()

    except KeyboardInterrupt:
        print("Shutting down...")
        break


Benchmarking performance...
Average 5.958663731309051e-10 FPS


QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to target thread (0x55eb131e7540)

QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to target thread (0x55eb131e7540)

QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to target thread (0x55eb131e7540)

QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to target thread (0x55eb131e7540)

QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to target thread (0x55eb131e7540)

QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to target thread (0x55eb131e7540)

QObject::moveToThread: Current thread (0x55eb131e7540) is not the object's thread (0x55eb17105a50).
Cannot move to tar

Average 1.803205207850124 FPS
Average 3.747393588079256 FPS
Average 15.448804222514429 FPS
Average 3.9054709876382274 FPS
Average 15.56957730584912 FPS
Average 14.089407844349193 FPS
Average 3.594383433098667 FPS
Average 16.540031941952403 FPS
Average 16.560930250922947 FPS
Average 14.777573820856924 FPS
Average 13.454925737016007 FPS
Average 16.512162763323136 FPS
Average 3.933700289519615 FPS
Average 14.222415126073216 FPS
Average 12.725977438362067 FPS
Average 13.45423517863902 FPS
Average 13.060671358286106 FPS
Average 12.857917131610893 FPS
Average 15.053364868696367 FPS
Average 13.817324100489534 FPS
Average 14.75760785045019 FPS
Average 12.561632594385111 FPS
Average 15.521065154884859 FPS
Average 18.30191165625968 FPS
Average 16.503586535245628 FPS
Average 12.395541002210585 FPS
Average 16.49878254575779 FPS
Average 15.758936555528003 FPS
Average 17.485446526092865 FPS
Average 17.730477386191183 FPS
Average 20.684726270262807 FPS
Average 15.937258716600297 FPS
Average 14.367794

In [6]:
# close the socket
socket.close()