In [1]:
import torch
import time
import numpy as np
import json
import trt_pose.coco
from torch2trt import TRTModule
import torchvision.transforms as transforms
import PIL.Image
import cv2
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects

# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

Using cache found in /home/keith/.cache/torch/hub/ultralytics_yolov5_master

                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Focus                     [3, 32, 3]                    
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  1    156928  models.common.C3                        [128, 128, 3]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  1    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Conv             

Adding autoShape... 


In [2]:
#update model path if need be
with open('trt_pose/tasks/human_pose/human_pose.json', 'r') as f:
    human_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(human_pose)

In [3]:
#update model path if need be
OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

<All keys matched successfully>

In [4]:
mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device('cuda')

def preprocess(image):
    global device
    device = torch.device('cuda')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = PIL.Image.fromarray(image)
    image = transforms.functional.to_tensor(image).to(device)
    image.sub_(mean[:, None, None]).div_(std[:, None, None])
    return image[None, ...]

In [5]:
parse_objects = ParseObjects(topology)
draw_objects = DrawObjects(topology)

In [6]:
WIDTH = 224
HEIGHT = 224

def poseProc(crop_img, w, h):
    crop_img = cv2.resize(crop_img, (WIDTH,HEIGHT), interpolation =cv2.INTER_LINEAR)
    data = preprocess(crop_img)
    cmap, paf = model_trt(data)
    cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
    counts, objects, peaks = parse_objects(cmap, paf)
    draw_objects(crop_img, counts, objects, peaks)
    crop_img = cv2.resize(crop_img, (w, h), interpolation =cv2.INTER_LINEAR)
    return crop_img

In [7]:
def detPoseProc(video):
    cap = cv2.VideoCapture(video)
    
    cv2.namedWindow('Human Detection', cv2.WINDOW_AUTOSIZE)
    numFrame = 0
    if cap.isOpened():
        t0 = time.time()
        torch.cuda.current_stream().synchronize()
        while(cap.isOpened()):
            ret, frame = cap.read()
            if ret == False:
                break
            else:
                numFrame += 1
                #frame = cv2.resize(frame, (1280,720), interpolation =cv2.INTER_LINEAR)
                results = model(frame)
                
                #iterate through detections
                for p in range(0, len(results.xyxy[0])):
                    
                    #only do pose estimation if object is a person
                    #0 is person class
                    if(results.xyxy[0][p][5] < 1):
                        x = int(results.xyxy[0][p][0])
                        y = int(results.xyxy[0][p][1])
                        w = int(results.xyxy[0][p][2] - results.xyxy[0][p][0])
                        h = int(results.xyxy[0][p][3] - results.xyxy[0][p][1])
                        img = frame.astype(np.uint8)
                        crop_img = img[y:y+h, x:x+w]
                        top_left = (x, y)
                        bottom_right = (x + w, y + h)
                        crop_img = poseProc(crop_img, w, h)
                        
                        #poor way to take cropped image and overlap onto original image
                        frame[y:y+h, x:x+w] = crop_img
                        frame = cv2.rectangle(frame, top_left, bottom_right, [0, 0, 255], 2)
                    
                        
                cv2.imshow('Human Detection', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break 
        torch.cuda.current_stream().synchronize()
        t1 = time.time()
    print("FPS: " + str(numFrame / (t1 - t0)) )   
    cap.release()
    cv2.destroyAllWindows()

In [9]:
#webcam implementation
detPoseProc(0)

#video
#update model path if need be
#detPoseProc('pedestrian.mp4')

FPS: 21.51590127510741
