In [5]:
## detect.py

import argparse
import os
import sys
import numpy as np
import tensorrt as trt
import cv2
import torchvision
import torch
import time
import traceback
import importlib
import common


class DETECTOR:

    def __init__(self):      

        
        self.num_classes = 80
        self.confthre = 0.5
        self.nmsthre = 0.3

        
        self.input_h = 640
        self.input_w = 640
        self.device = torch.device('cuda:0')
        
        
        self.categories = {} 
        # Sports
        self.categories['1'] = [35,36,30,37,31,32,33,38,39] 
        # Vehicle
        self.categories['2'] = [5,2,9,6,3,4,7,8]
        # Food
        self.categories['3'] = [48,47,51,56,52,55,53,50,54,49]
        # Product
        self.categories['4'] = [60,74,40,46,68,57,75,42,61,43,79,67,34,44,64,69,70,59,73,66,77,72,58,45,78,71,80,63,76,41]
        # Animal
        self.categories['5'] = [22,15,16,20,17,21,24,18,65,1,19,23]
        # Structure
        self.categories['6'] = [14,11,13,12,62,10]
        # Accessory
        self.categories['7'] = [25,27,29,28,26]
        # person
        self.categories['8'] = [1]
        
        
    
    def load_set_meta(self, channel_id=None, model_parameter=None, channel_info_dict=None, model_name=None):

        category_id = model_parameter['category_id']
        channel_info_dict[channel_id]['map_data'][model_name]['category_id'] = category_id

    
    def load(self, weights):
        self.weights = weights
        self.Engine = common.Engine()      
        self.Engine.make_context(self.weights)
        self.batchsize = int(self.Engine.input_shape[0])

        
#         self.init_sample_data()
        
        
    def init_sample_data(self):
        try:
            base_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
            sample_path = os.path.join(base_path, 'sample.png')
            sample_img = cv2.imread(sample_path)

            sample_img_batch = [sample_img] * self.batch_size
            sample_img_batch = torch.tensor(sample_img_batch).cuda()
            
            self.inference_batch(sample_img_batch)
        except Exception as e:
            print(f'initialize sample image : {e}')
            print(traceback.format_exc())
        

    def parse_input(self,input_data_batch):
        res = []
        for input_data in input_data_batch:
            frame = input_data['framedata']['frame']
            bbox = input_data['bbox']
            cropped_img = common.getCropByFrame(frame,bbox)
            res.append(cropped_img)
        return res
       

    
    
    def preprocess(self,frame_batch) :  
        result = torch.zeros([len(frame_batch), 3, self.input_h, self.input_w], dtype=torch.float32, device=torch.device("cuda:0")).fill_(144)
        scale_list = []
        for idx, frame in enumerate(frame_batch) :

#             frame = frame.to(torch.device("cuda:0"))
            _, h, w = frame.shape
            
            r = min(self.input_h/h, self.input_w/w)
            if r < 1 :  
                rw, rh = int(r*w), int(r*h)

                resized_img = torchvision.transforms.functional.resize(frame, (rh,rw)).float()
                result[idx, :,:rh,:rw] = resized_img 
                scale_list.append(r)
            else : 

                result[idx, :,:h,:w] = frame
                scale_list.append(None)

            
        return result, scale_list

    
    def preprocess_for_calibrator(self,frame_batch) :  
        result = torch.zeros([len(frame_batch), 3, self.input_h, self.input_w], dtype=torch.float32, device=torch.device("cpu")).fill_(144)
        scale_list = []
        for idx, frame in enumerate(frame_batch) :

#             frame = frame.to(torch.device("cuda:0"))
            _, h, w = frame.shape
            
            r = min(self.input_h/h, self.input_w/w)
            if r < 1 :  
                rw, rh = int(r*w), int(r*h)

                resized_img = torchvision.transforms.functional.resize(frame, (rh,rw)).float()
                result[idx, :,:rh,:rw] = resized_img 
                scale_list.append(r)
            else : 

                result[idx, :,:h,:w] = frame
                scale_list.append(None)

            
        return result, scale_list



    def inference(self,input_data) : 
        output_data = self.Engine.do_inference_v2(input_data)
        return output_data[0]


    def postprocess(self,prediction,scale_list, class_agnostic=False):
        print(f'prediction.shape {prediction.shape}')
        box_corner = prediction.new(prediction.shape)

        box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 # torch.Size([1, 8400]
        box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
        box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
        box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
        prediction[:, :, :4] = box_corner[:, :, :4] #torch.Size([1, 8400, 4])
#         print(f'box_corner[:, :, 0]  : {box_corner[:, :, 0],box_corner[:, :, 1] ,box_corner[:, :, 2] ,box_corner[:, :, 3] }')
#         print(f'box_corner[:, :, 0]  : {box_corner[:, :, 0].shape,box_corner[:, :, 1] ,box_corner[:, :, 2] ,box_corner[:, :, 3] }')
            
#         print(f'box_corner[:, :, :4] : {box_corner[:, :, :4]} {box_corner[:, :, :4].shape}')
        print(f'prediction[:, :, 2] : {prediction[:, :, 2].shape} prediction[:, :, 0] {prediction[:, :, 0].shape}, box_corner[:, :, 0] : {box_corner[:, :, 0].shape}')
        #prediction[:, :, 2] : torch.Size([1, 8400]) prediction[:, :, 0] torch.Size([1, 8400]), box_corner[:, :, 0] : torch.Size([1, 8400])
        outputs = list()
        #prediction 이미지 개수만큼 for문 돌기
        
        
        
        for i, image_pred in enumerate(prediction):
#             print(f'image_pred : {image_pred}')
#             print(f'image_pred.shape : {image_pred.shape}') #image_pred.shape : torch.Size([8400, 85])
            if not image_pred.size(0):
                outputs.append(None)
                continue
            #print(f'image_pred[:, 5: 5 + self.num_classes] : {image_pred[:, 5: 5 + self.num_classes].shape}')
            #image_pred[:, 5: 5 + self.num_classes] : torch.Size([8400, 80])
            # score, 라벨
            class_conf, class_pred = torch.max(image_pred[:, 5: 5 + self.num_classes], 1, keepdim=True)
#             print(f'class_conf : {class_conf}, class_pred : {class_pred} ')
#             print(f'class_conf : {class_conf.shape} ,class_pred : {class_pred.shape} ')
#             class_conf : torch.Size([8400, 1]) ,class_pred : torch.Size([8400, 1]) 
            
            conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= self.confthre).squeeze()
            print(f'conf_mask : {conf_mask}, {conf_mask.shape}') #torch.Size([8400])
            print(f'image_pred[:, :5] : {image_pred[:, :5].shape}')
            detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
            print(f'detections: {detections}')
            
#             image_pred[:, :5] : torch.Size([8400, 5])
#             detections: torch.Size([8400, 7])
            
            detections = detections[conf_mask]
            if not detections.size(0):
                outputs.append(None)
                continue
            print(f'detections[:, :4] : {detections[:, :4]}, {detections[:, :4].shape}')
            print(f'detections[:, 4] * detections[:, 5] : {detections[:, 4] * detections[:, 5]}')
            print(f'detections[:, 6] : {detections[:, 6]}, {detections[:, 6].shape}')
            if class_agnostic:
                nms_out_index = torchvision.ops.nms(
                    detections[:, :4],
                    detections[:, 4] * detections[:, 5],
                    self.nmsthre,
                )

            else:
                nms_out_index = torchvision.ops.batched_nms(
                    detections[:, :4],
                    detections[:, 4] * detections[:, 5],
                    detections[:, 6],
                    self.nmsthre,
                )

            detections = detections[nms_out_index]
            

            s = time.time()
            ### bbox 복원
            output = list()
            for j, det in enumerate(detections):
                if isinstance(det, type(None)):
                    output.append(None)
                    continue

                tmp = det.detach().cpu().numpy()
                label = str(int(tmp[6]))
                    
                x1, y1, x2, y2 = map(int, tmp[:4])
                t2 = time.time()

                restore_bbox = common.restoreBboxScale( [x1,y1,x2,y2], (scale_list[i],scale_list[i]) )
                t3 = time.time()

                tmp[0] = restore_bbox[0]
                tmp[1] = restore_bbox[1]
                tmp[2] = restore_bbox[2]
                tmp[3] = restore_bbox[3]
                
                                
                score1 = tmp[4]
                score2 = tmp[5]
                score = (tmp[4] + tmp[5]) / 2
                
                x1, y1, x2, y2 = map(int, restore_bbox)            
                bbox = [x1, y1, x2, y2]      
                output.append({"bbox":bbox, "score":score, "label":label})
                t4 = time.time()
            outputs.append(output)
            e = time.time()
           
        return outputs             
  

    
weights = '/DATA_17/trt_test/engines/yoloxm_test_ij/yoloxm_int8_024.trt'

img_path = '/DATA_17/ij/test/test_image.jpeg'
# image = cv2.imread(img_path)[..., ::-1]#BGR 순서를 RGB로 뒤집습니다.
image = cv2.imread(img_path)
image = np.copy(image)
image = torch.from_numpy(image).to(torch.device("cuda"))
image = image.permute(2,0,1)
frame_batch = [image]



pe = DETECTOR()
pe.load(weights)


input_data, scale_list = pe.preprocess(frame_batch)

result = pe.inference(input_data)


output_batch = pe.postprocess(result, scale_list)
    




prediction.shape torch.Size([1, 8400, 85])
prediction[:, :, 2] : torch.Size([1, 8400]) prediction[:, :, 0] torch.Size([1, 8400]), box_corner[:, :, 0] : torch.Size([1, 8400])
conf_mask : tensor([False, False, False,  ..., False, False, False], device='cuda:0'), torch.Size([8400])
image_pred[:, :5] : torch.Size([8400, 5])
detections: tensor([[-3.8707e+00,  1.8199e-01,  4.2044e+01,  ...,  8.0466e-06,
          1.0606e-01,  0.0000e+00],
        [-1.3385e+01,  6.0095e-02,  5.7748e+01,  ...,  2.4736e-06,
          8.5399e-02,  0.0000e+00],
        [-7.1481e+00,  6.5004e-01,  7.5329e+01,  ...,  1.6987e-06,
          6.1109e-02,  0.0000e+00],
        ...,
        [ 3.0487e+02,  4.0363e+02,  8.1234e+02,  ...,  3.9488e-05,
          5.4466e-02,  0.0000e+00],
        [ 3.6552e+02,  3.9675e+02,  7.9137e+02,  ...,  1.5587e-05,
          1.1562e-01,  0.0000e+00],
        [ 4.6104e+02,  4.2508e+02,  7.2706e+02,  ...,  1.4603e-05,
          1.0546e-01,  0.0000e+00]], device='cuda:0')
detections[:, :4]



In [2]:
result[0]

tensor([[-3.8707e+00,  1.8199e-01,  4.2044e+01,  ...,  2.1921e-03,
          1.5117e-03,  2.3325e-02],
        [-1.3385e+01,  6.0095e-02,  5.7748e+01,  ...,  1.6623e-03,
          1.2262e-03,  2.1149e-02],
        [-7.1481e+00,  6.5004e-01,  7.5329e+01,  ...,  1.1270e-03,
          1.2085e-03,  2.0137e-02],
        ...,
        [ 3.0487e+02,  4.0363e+02,  8.1234e+02,  ...,  5.5450e-03,
          3.5926e-03,  7.2937e-03],
        [ 3.6552e+02,  3.9675e+02,  7.9137e+02,  ...,  5.0956e-03,
          4.7047e-03,  7.9430e-03],
        [ 4.6104e+02,  4.2508e+02,  7.2706e+02,  ...,  5.9679e-03,
          4.9112e-03,  9.6800e-03]], device='cuda:0')