In [1]:
import argparse
import os
import sys
import numpy as np
import tensorrt as trt
import cv2
import torchvision
import torch
import time
import traceback
import importlib
import common_people as common
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

torch.cuda.init()


class PEOPLE_DETECTOR:
    def __init__(self):  
#     def __init__(self, logger):  
#         self.logger = logger

        
        self.num_classes = list(range(1))
        self.threshold = 0.3


        self.input_h = 544
        self.input_w = 960
        self.wh_format = False

        self.stride = 16
        self.box_norm = 35.0
        self.grid_calculator()

    def load(self, weights):
        self.weights = weights
        self.Engine = common.Engine()      
        self.Engine.make_context(self.weights)
        self.batchsize = 1

    def parse_input(self,input_data_batch):
        res = []
        for input_data in input_data_batch:
            frame = input_data['framedata']['frame']
            bbox = input_data['bbox']
            cropped_img = common.getCropByFrame(frame,bbox)
            res.append(cropped_img)
        return res
    
    
    def grid_calculator(self):
        self.grid_h = int(self.input_h / self.stride)
        self.grid_w = int(self.input_w / self.stride)
        self.grid_size = self.grid_h * self.grid_w

        self.grid_centers_w = []
        self.grid_centers_h = []

        for i in range(self.grid_h):
            value = (i * self.stride + 0.5) / self.box_norm
            self.grid_centers_h.append(value)

        for i in range(self.grid_w):
            value = (i * self.stride + 0.5) / self.box_norm
            self.grid_centers_w.append(value)
        
#         print(f'self.grid_h : {self.grid_h}, self.grid_w : {self.grid_w}, self.grid_size : {self.grid_size}')
#         print(f'self.grid_centers_w : {self.grid_centers_w}, self.grid_centers_h : {self.grid_centers_h}')

    
    # 박스 nomalize
    def applyBoxNorm(self,o1, o2, o3, o4, x, y):
        """
        Applies the GridNet box normalization
        Args:
            o1 (float): first argument of the result
            o2 (float): second argument of the result
            o3 (float): third argument of the result
            o4 (float): fourth argument of the result
            x: row index on the grid
            y: column index on the grid

        Returns:
            float: rescaled first argument
            float: rescaled second argument
            float: rescaled third argument
            float: rescaled fourth argument
        """
#         print(f'o1, o2, o3, o4 before : {o1}')
        o1 = (o1 - self.grid_centers_w[x]) * -self.box_norm
        o2 = (o2 - self.grid_centers_h[y]) * -self.box_norm
        o3 = (o3 + self.grid_centers_w[x]) * self.box_norm
        o4 = (o4 + self.grid_centers_h[y]) * self.box_norm
#         print(f'o1, o2, o3, o4 after: {o1}')
        
#         o1, o2, o3, o4 before : (1.0848912, 0.9234038, 0.86667395, 1.4356118, 44, 7)
#         o1, o2, o3, o4 after: (666.5288079977036, 80.18086701631546, 734.8335881233215, 162.74641454219818)
    
    
        return o1, o2, o3, o4
    
    def NMSBoxes(self, bboxes,scores,class_ids, outputs_list,scale_list):
        indexes = cv2.dnn.NMSBoxes(bboxes, scores, self.threshold, 0.5)
        print('indexes',indexes)
        ori_h = scale_list[0][0]
        ori_w = scale_list[0][1]
        
        for idx in indexes:
            idx = int(idx)
            xmin, ymin, w, h = bboxes[idx]
            xmax = w + xmin
            ymax = h + ymin
            print('ccccccc', xmin, ymin, w, h)
            ori_x_min = int((xmin * ori_w)/self.input_w)
            ori_y_min = int((ymin * ori_h)/self.input_h)
            
            ori_x_max = int((xmax * ori_w)/self.input_w)
            ori_y_max = int((ymax * ori_h)/self.input_h)
            
            
            class_id = class_ids[idx]
            score = scores[idx]
            output = {"bbox":[ori_x_min, ori_y_min, ori_x_max, ori_y_max], "score":score, "label":class_id}
            outputs_list.append(output)
        return outputs_list

    def postprocess(self,outputs, scale_list):
        """
        Postprocesses the inference output
        Args:
            outputs (list of float): inference output
            min_confidence (float): min confidence to accept detection
            analysis_classes (list of int): indices of the classes to consider

        Returns: list of list tuple: each element is a two list tuple (x, y) representing the corners of a bb
        """
        t1 = time.time()
        outputs[0] = outputs[0].detach().cpu().numpy()
        outputs[1] = outputs[1].detach().cpu().numpy()
        print(f"outputs[0].shape : {outputs[0].shape}")
        print(f"outputs[1].shape : {outputs[1].shape}")
        
        t2 = time.time()
        outputs_list = []
        bbs = []
        class_ids = []
        scores = []
        if isinstance(outputs[0], type(None)):
            return
        
        
        t3 = time.time()  
        boxes = outputs[0]
        
#         print(f"bbox : {boxes}")
#         print(f"bbox.shape : {boxes.shape}")   
        
#         self.grid_h : 34, self.grid_w : 60, self.grid_size : 2040
#       torch.Size([12, 34, 60]) -> 24480
        #[0, 1, 2] 
        for c in self.num_classes:
            print(f'c : {c}')
            x1_idx = c * 4 * self.grid_size #self.grid_size : 2040
            y1_idx = x1_idx + self.grid_size
            x2_idx = y1_idx + self.grid_size
            y2_idx = x2_idx + self.grid_size
#             print(f'c : {c} x1_idx : {x1_idx}, y1_idx : {y1_idx}, x2_idx : {x2_idx} y2_idx : {y2_idx}')
#             c : 0 x1_idx : 0, y1_idx : 2040, x2_idx : 4080 y2_idx : 6120
#             c : 1 x1_idx : 8160, y1_idx : 10200, x2_idx : 12240 y2_idx : 14280
#             c : 2 x1_idx : 16320, y1_idx : 18360, x2_idx : 20400 y2_idx : 22440
#                 y2_idx : 22440 + 2040 = 24480 -> outputs[0]의 tensor갯수

                
            t3_1 = time.time()
            for h in range(self.grid_h):
                for w in range(self.grid_w):
                    i = w + h * self.grid_w
#                     print(f'w : {w}, h : {h}, self.grid_w : {self.grid_w}, i : {i}')
#                     print(f'c * self.grid_size + i : {c * self.grid_size + i}')
#                     i : 0 - 2039
#                     c * self.grid_size + i : 0 - 6119
#                     print(f'i : {i}, w : {w}, h: {h}')
                    score = outputs[1][c * self.grid_size + i]
#                     print(f'score : {score}')
                    if score >= self.threshold:
                        o1 = boxes[x1_idx + w + h * self.grid_w]
                        o2 = boxes[y1_idx + w + h * self.grid_w]
                        o3 = boxes[x2_idx + w + h * self.grid_w]
                        o4 = boxes[y2_idx + w + h * self.grid_w]
                    #  i == 0 -> 0,2040,4080,6120
                    #  i == 1 -> 1,2041,4081,6121

                        o1, o2, o3, o4 = self.applyBoxNorm(o1, o2, o3, o4, w, h)

                        xmin = int(o1)
                        ymin = int(o2)
                        xmax = int(o3)
                        ymax = int(o4)
                        if self.wh_format:
                            bbs.append([xmin, ymin, xmax - xmin, ymax - ymin])
                        else:
                            bbs.append([xmin, ymin, xmax, ymax])
                        class_ids.append(c)
                        scores.append(float(score))
            
            t3_2 = time.time()
            print(f"t3_2 ~ t3_1 : {t3_2 - t3_1}")

        t4 = time.time()      
        print(f'bbs {bbs}')
        outputs_list = self.NMSBoxes(bbs,scores,class_ids,outputs_list,scale_list)
        t5 = time.time()
        
        print(f"t1~t2 : {t2-t1}")
        print(f"t2~t3 : {t3-t2}")
        print(f"t3~t4 : {t4-t3}")
        print(f"t4~t5 : {t5-t4}")
        


        
        
        return outputs_list

    def preprocess(self,frame_batch) : 

        input_data = torch.zeros([3, self.input_h, self.input_w], dtype=torch.float32, device=torch.device("cuda")).fill_(144)
        scale_list = []
        
        for idx, frame in enumerate(frame_batch) :
            _, h, w = frame.shape
            permute = [2, 1, 0]
            frame = frame[permute,:,:]
            resized_img = torchvision.transforms.functional.resize(frame, (self.input_h, self.input_w)).float()
            resized_img = resized_img.div(255.0)
            input_data[:,:self.input_h,:self.input_w] = resized_img 
            input_data = torch.ravel(input_data)
            scale_list.append([h,w])
        print('input_data.shape 1 ',input_data.shape,scale_list)
        return input_data, scale_list

    
    def inference(self,input_data) : 
        output_data = self.Engine.do_inference_v2(input_data)
        return output_data

    
#     def parse_output(self,input_data_batch,output_batch,reference_CM):
    def parse_output(self,output_batch):
        res = []
        idx_i = 0
        for idx_i, data in enumerate(input_data_batch): 
            if output_batch == None:
                input_data = dict()
                input_data["framedata"] = framedata
                input_data["bbox"] = None
                input_data["scenario"] = scenario   
                input_data["data"] = None
                input_data["available"] = False
                res.append(input_data)
                continue
            for idx_j, output in enumerate(output_batch): 
                print('output',output)
                if isinstance(output, type(None)):
                    input_data = dict()
                    input_data["framedata"] = framedata
                    input_data["bbox"] = None
                    input_data["scenario"] = scenario   
                    input_data["data"] = None   
                    input_data["available"] = False
                    res.append(input_data)
                    continue

                input_data = dict()
                input_data["framedata"] = framedata
                input_data["bbox"] = output['bbox']
                input_data["scenario"] = scenario   
                input_data["data"] = {"score":output['score'], "label":str(label)}
                input_data["available"] = True
                res.append(input_data)
        return res  
        
   
    
weights = '/DATA_17/ij/test/best_model_people.trt'

img_path = '/DATA_17/ij/test/test_image.jpeg'
# image = cv2.imread(img_path)[..., ::-1]#BGR 순서를 RGB로 뒤집습니다.
image = cv2.imread(img_path)
image = np.copy(image)
image = torch.from_numpy(image).to(torch.device("cuda"))
image = image.permute(2,0,1)
frame_batch = [image]



pe = PEOPLE_DETECTOR()
pe.load(weights)


input_data, scale_list = pe.preprocess(frame_batch)

result = pe.inference(input_data)


# output_batch = pe.postprocess(result, scale_list)

# print(output_batch)

# image = cv2.imread(img_path)


# for result in output_batch:
#     xmin, ymin, xmax, ymax = result['bbox']
#     color = [255, 0, 0] 
#     cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
# plt.imshow(image)
# plt.show()






input_data.shape 1  torch.Size([1566720]) [[426, 640]]




In [2]:
input_h = 544
input_w = 960
stride = 16
box_norm = 35

grid_h = 34
grid_w = 60
norm_data_x = torch.zeros([grid_h, grid_w], dtype=torch.float32, device=torch.device("cuda"))
norm_data_y = torch.zeros([grid_h, grid_w], dtype=torch.float32, device=torch.device("cuda"))


grid_h = int(input_h / stride)
grid_w = int(input_w / stride)
grid_size = grid_h * grid_w

for i in range(grid_h):
    value = (i * stride + 0.5) / box_norm
    norm_data_y[i,:] = value


for i in range(grid_w):
    value = (i * stride + 0.5) / box_norm
    norm_data_x[:,i] = value

norm_data_x = norm_data_x.view(-1,)
norm_data_y = norm_data_y.view(-1,)
norm_data_x = torch.cat([norm_data_x,norm_data_x,norm_data_x],-1)
norm_data_y = torch.cat([norm_data_y,norm_data_y,norm_data_y],-1)

label_c0 = torch.zeros([2040], dtype=torch.float32, device=torch.device("cuda")).fill_(0)
label_c1 = torch.zeros([2040], dtype=torch.float32, device=torch.device("cuda")).fill_(1)
label_c2 = torch.zeros([2040], dtype=torch.float32, device=torch.device("cuda")).fill_(2)
label_tensor = torch.cat([label_c0,label_c1,label_c2],-1)


In [6]:
import time
aa = result[0]
bb = result[1]
# c : 0 x1_idx : 0, y1_idx : 2040, x2_idx : 4080 y2_idx : 6120
# c : 1 x1_idx : 8160, y1_idx : 10200, x2_idx : 12240 y2_idx : 14280
# c : 2 x1_idx : 16320, y1_idx : 18360, x2_idx : 20400 y2_idx : 22440

ori_w = 640
ori_h = 426
def postprocess(aa,bb):

    a = torch.tensor(aa)
    b = torch.tensor(bb)
    output_list = []
    
    x1 = torch.cat([a[:2040],a[8160:10200],a[16320:18360]],dim=0)
    y1 = torch.cat([a[2040:4080],a[10200:12240],a[18360:20400]],dim=0)
    x2 = torch.cat([a[4080:6120],a[12240:14280],a[20400:22440]],dim=0)
    y2 = torch.cat([a[6120:8160],a[14280:16320],a[22440:]],dim=0)

    tensor_x1 = (x1 - norm_data_x) * -35
    tensor_y1 = (y1 - norm_data_y) * -35
    tensor_x2 = (x2 + norm_data_x) * 35
    tensor_y2 = (y2 + norm_data_y) * 35

    all_tensor = torch.stack([tensor_x1,tensor_y1,tensor_x2,tensor_y2],dim=1)
    score_tensor = result[1] >=0.7

    box = all_tensor[score_tensor]
    score = b[score_tensor]
    label = label_tensor[score_tensor]

    nms_out_index = torchvision.ops.batched_nms(
        box,
        score,
        label,
        0.5,
    )
    result_box = box[nms_out_index].detach().cpu().numpy()
    result_score = score[nms_out_index].detach().cpu().numpy()

    for idx, bbox in enumerate(result_box):
        if isinstance(bbox, type(None)):
            output_list.append(None)
            continue
        x1, y1, x2, y2 = map(int, bbox)
        x1 = int((x1 * ori_w)/input_w)
        y1 = int((y1 * ori_h)/input_h)
        x2 = int((x2 * ori_w)/input_w)
        y2 = int((y2 * ori_h)/input_h)
        score = str(result_score[idx])
        out = {"bbox":[x1, y1, x2, y2], "score":score, "label":label}
        output_list.append(out)
   
    
    
    

for i in range(1000) :
    s1 = time.time()
    postprocess(aa,bb)
    print((time.time()-s1))



#     x1 = torch.cat([a[:2040],a[8160:10200],a[16320:18360]],dim=0)
#     y1 = torch.cat([a[2040:4080],a[10200:12240],a[18360:20400]],dim=0)
#     x2 = torch.cat([a[4080:6120],a[12240:14280],a[20400:22440]],dim=0)
#     y2 = torch.cat([a[6120:8160],a[14280:16320],a[22440:]],dim=0)

#     tensor_x1 = (x1 - norm_data_x) * -35
#     tensor_y1 = (y1 - norm_data_y) * -35
#     tensor_x2 = (x2 + norm_data_x) * 35
#     tensor_y2 = (y2 + norm_data_y) * 35

#     all_tensor = torch.stack([tensor_x1,tensor_y1,tensor_x2,tensor_y2,label_tensor],dim=1)




  if sys.path[0] == "":
  del sys.path[0]


0.0030083656311035156
0.0013453960418701172
0.0008940696716308594
0.0008788108825683594
0.0008776187896728516
0.0008680820465087891
0.0008647441864013672
0.0008616447448730469
0.0008752346038818359
0.0011472702026367188
0.0009279251098632812
0.0008893013000488281
0.0008933544158935547
0.0008711814880371094
0.0008633136749267578
0.0008616447448730469
0.00086212158203125
0.000873565673828125
0.0008616447448730469
0.0008616447448730469
0.0008623600006103516
0.0008673667907714844
0.0008599758148193359
0.0008587837219238281
0.0008592605590820312
0.0008676052093505859
0.0008914470672607422
0.0008614063262939453
0.0008587837219238281
0.0008552074432373047
0.0008647441864013672
0.0008597373962402344
0.0008606910705566406
0.0008606910705566406
0.0008573532104492188
0.0008883476257324219
0.0008640289306640625
0.0008587837219238281
0.000858306884765625
0.000858306884765625
0.0008931159973144531
0.0008618831634521484
0.0008552074432373047
0.0008592605590820312
0.0008738040924072266
0.0008587837219

0.001873016357421875
0.0011818408966064453
0.000865936279296875
0.0008699893951416016
0.0008614063262939453
0.0008616447448730469
0.0008599758148193359
0.0008592605590820312
0.0008695125579833984
0.0008580684661865234
0.0008592605590820312
0.0008585453033447266
0.0008645057678222656
0.0008616447448730469
0.0008578300476074219
0.0008592605590820312
0.0008578300476074219
0.0008652210235595703
0.0008587837219238281
0.0008575916290283203
0.0008580684661865234
0.0008637905120849609
0.0008625984191894531
0.0008568763732910156
0.0008566379547119141
0.0008571147918701172
0.003215312957763672
0.0008685588836669922
0.0008695125579833984
0.0008599758148193359
0.0008604526519775391
0.0008602142333984375
0.0008654594421386719
0.0008633136749267578
0.0008625984191894531
0.0008599758148193359
0.0008599758148193359
0.0008652210235595703
0.0008614063262939453
0.0008580684661865234
0.000858306884765625
0.0008609294891357422
0.0008690357208251953
0.0008590221405029297
0.000858306884765625
0.0008616447448

0.0018763542175292969
0.0013370513916015625
0.0009095668792724609
0.0008795261383056641
0.0008714199066162109
0.0008625984191894531
0.0008652210235595703
0.0008909702301025391
0.0008649826049804688
0.0008587837219238281
0.0008594989776611328
0.0008716583251953125
0.0008919239044189453
0.0008614063262939453
0.0008614063262939453
0.0008549690246582031
0.0008668899536132812
0.0008568763732910156
0.0008592605590820312
0.0008568763732910156
0.0008654594421386719
0.0008602142333984375
0.0008568763732910156
0.0008597373962402344
0.0008573532104492188
0.00086212158203125
0.000858306884765625
0.0008592605590820312
0.0008568763732910156
0.0008597373962402344
0.0008604526519775391
0.000858306884765625
0.0008592605590820312
0.0008563995361328125
0.0008630752563476562
0.0008578300476074219
0.0008573532104492188
0.0008611679077148438
0.0008580684661865234
0.0008647441864013672
0.0008609294891357422
0.0008571147918701172
0.0008571147918701172
0.0008640289306640625
0.0008592605590820312
0.000860214233