### This notebook is used to communicate with Unity VirtualHome over a socket
### Each frame from Unity is sent directly to this Python Notebook where it's processed before being passed into a trained YOLACT Model for Inference in Real-Time. 
### The YOLACT Model is trained on a synthetic home indoor dataset

Author: Adam Goldstein



In [1]:
# Imports

import numpy as np
import urllib
import time
import cv2
from yolact_edge.inference import YOLACTEdgeInference
import zmq
import struct
import gzip


In [2]:
# Load Model

# weights = "yolact_edge_resnet50_54_800000.pth"
weights = "weights/yolact_base_26666_1280000.pth"
# All available model configs, depends on which weights

In [3]:
# Load YOLACT Configuration

model_configs = [
    'yolact_base_config',
    'yolact_edge_config',
    'yolact_edge_mobilenetv2_config',
    'yolact_edge_vid_config',
    'yolact_edge_vid_minimal_config',
    'yolact_edge_vid_trainflow_config',
    'yolact_edge_youtubevis_config',
    'yolact_resnet50_config',
    'yolact_resnet152_config',
    'yolact_edge_resnet50_config',
    'yolact_edge_vid_resnet50_config',
    'yolact_edge_vid_trainflow_resnet50_config',
    'yolact_edge_youtubevis_resnet50_config',
]
config = model_configs[1]

# Load dataset
datasets = [
    'SyntheticHome',
    'coco2014_dataset',
    'coco2017_dataset',
    'coco2017_testdev_dataset',
    'flying_chairs_dataset',
    'youtube_vis_dataset',
]
dataset = datasets[0]
# Used tensorrt calibration
calib_images = "./data/calib_images"
# Override some default configuration
# config_ovr = {
#     'use_fast_nms': True,  # Does not work with regular nms
#     'mask_proto_debug': False
# }
config_ovr = {
    '--fast_nms': True,  # Does not work with regular nms
    'mask_proto_debug': False
}

# Load Model
model_inference = YOLACTEdgeInference(
    weights, config, dataset, calib_images, config_ovr)

Configuring YOLACT edge...
Loading YOLACT edge model...
[03/03/2023-09:52:21] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/03/2023-09:52:21] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/03/2023-09:52:21] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/03/2023-09:52:21] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
[03/03/2023-09:52:21] [TRT] [W] CUDA

In [7]:
context = zmq.Context()
socket = context.socket(zmq.SUB)

# Subscribe to all messages
socket.connect("tcp://localhost:5555")
socket.setsockopt(zmq.SUBSCRIBE, b"")

# print("Waiting for messages...")

print('Starting up on {} port {}'.format('localhost', 5555))

print("Waiting for messages...")

payload_size = struct.calcsize("Q")

data = b""

Starting up on localhost port 5555
Waiting for messages...


In [8]:
print("Benchmarking performance...")
start = time.time()
prev_time = 0

while True:
    t1 = time.time()
    try:
        while len(data) < payload_size:
            packet = socket.recv()

            if not packet: break
            data+=packet
        
        image_data = gzip.decompress(data)

        img = np.frombuffer(image_data, dtype=np.uint8)

        img = img.reshape((480, 640, 4))
        img = cv2.resize(img, (1280, 960))

        # img = img.reshape((320, 512, 4))
        # img = cv2.resize(img, (1024, 640))

        # # Flip image
        img = cv2.flip(img, 0)
        # # Convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)

        # img = cv2.convertScaleAbs(img, alpha=3.5, beta=5)

        # Time inference

        # t3 = time.time()
        p = model_inference.predict(img, False)
        # t4 = time.time()
        # print(f"Time taken: {t4 - t3} seconds")

        

        print(f"Average {1 / (time.time() - prev_time)} FPS")

        if p:
           
            cv2.imshow("Unity Cam", p['img'])
            # cv2.resizeWindow("Garnet Cam", 1280, 960)
            # Change window size
            
            cv2.waitKey(1)
        else:
            print("No prediction")

        prev_time = time.time()
        data = b""
        t2 = time.time()
        # print(f"Time taken: {t2 - t1} seconds")


    except KeyboardInterrupt:
        print("Shutting down...")
        break


Benchmarking performance...
Average 5.959950999433863e-10 FPS
No predictions!
Average 24.03006691722431 FPS
No prediction
No predictions!
Average 19.034476499071943 FPS
No prediction
Average 12.876074463382288 FPS
Average 14.280718001797728 FPS
Average 15.826487257469305 FPS
Average 13.301780102055378 FPS
Average 20.576149291366395 FPS
Average 18.425729134176503 FPS
Average 15.925458763493323 FPS
Average 13.754071159206427 FPS
Average 20.158042206746735 FPS
Average 14.228687351159177 FPS
Average 10.02505843941661 FPS
Average 10.208894773736272 FPS
Average 10.323525792175955 FPS
Average 8.167799702833596 FPS
Average 11.504481869548522 FPS
Average 11.655150374999653 FPS
Average 9.877154436508441 FPS
Average 9.977719468940236 FPS
Average 9.133380368792844 FPS
Average 16.97006380508094 FPS
Average 8.649583225066714 FPS
Average 15.083843404083922 FPS
Average 13.218941363715155 FPS
Average 9.193861558459208 FPS
Average 9.544005279086171 FPS
Average 9.26432785480312 FPS
Average 12.60095656980

: 

: 

In [6]:
# close the socket
socket.close()