In [29]:
import tensorflow as tf
import numpy as np
import cv2 as cv
from PIL import Image

In [41]:
def run_inference(model, data):
    interpreter = tf.lite.Interpreter(model_path = model)
    # get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.resize_tensor_input(input_details[0]['index'], data.shape)
    interpreter.allocate_tensors()
    
    interpreter.set_tensor(input_details[0]['index'], data)
    interpreter.invoke()
    
    iter=3
    result = []
    for i in range(iter):
        output_data = interpreter.get_tensor(output_details[i]['index'])
        result.append(output_data)

    # output_data
    return result


import torchvision.transforms as transforms
# 定义预处理函数，将像素值约束到[-1, 1]范围
def preprocess_image(img):
    transform = transforms.Compose([
        transforms.ToTensor(),           # 将图片转换为PyTorch的Tensor数据类型
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),   # 标准化到[-1, 1]
    ])
    return transform(img).unsqueeze(0)  # 添加batch维度，因为模型输入一般为(batch_size, channels, height, width)

In [61]:


def sigmoid(x):
    return 1/(1+np.exp(-x))

def xywh2xyxy(x):
    # convert [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y   

def process(input, mask, anchors):
    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2]) * 2 - 0.5

    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE / grid_h)

    box_wh = pow(sigmoid(input[..., 2:4]) * 2, 2)
    box_wh = box_wh * anchors

    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs


    

def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with box threshold. It's a bit different with origin yolov5 post process!
    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.
    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    global BOX_THRESH
    box_classes = np.argmax(box_class_probs, axis=-1)
    box_class_scores = np.max(box_class_probs, axis=-1)
    pos = np.where(box_confidences[..., 0] >= BOX_THRESH)
    
    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]
    
    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.

    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.

    # Returns
        keep: ndarray, index of effective boxes.
    """   
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]
    
    areas = w*h  # 预测框面积；
    order = scores.argsort()[::-1]  # 对scores排序，找到scores最大的; argsort(),返回的是元素值从小到大排序后的索引值的数组，[::-1]则表示从大到小排序；
    
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        
        xx1 = np.maximum(x[i], x[order[1:]])  # 找出最大的x1, np.maximum(a, b) --> a与b逐位比较取其大者
        yy1 = np.maximum(y[i], y[order[1:]])  # 找出最大的y1
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
        
        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1  # 计算交集面积
        
        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]  # 返回交并比小于阈值的索引序列，并取第0个索引。
        order = order[inds + 1]
    
    keep = np.array(keep)
    return keep
        

def yolov5_post_process(input_data):
    boxes, classes, scores = [], [], []
    for input, mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)  # filter boxes with box threshold
        boxes.append(b)
        classes.append(c)
        scores.append(s)
    
    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)
    
    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]
        
        keep = nms_boxes(b, s)
        
        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])
        
    if not nclasses and not nscores:
        return None, None, None
    
    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)
    
    return boxes, classes, scores   

def draw(image, boxes, scores, classes):
    """draw the boxes on the image

    Args:
        image (_type_): original image
        boxes (ndarray): boxes of objects
        scores (ndarray): scores of objects
        classes (ndarray): classes of objects
    """
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = box
        print('class:{}, score:{}'.format(CLASSES[cl], score))
        print('box coordinate left, top, right, down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
        top = int(top)
        left = int(left)
        right = int(right)
        bottom = int(bottom)
        
        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0}{1:.2f}'.format(CLASSES[cl], score), (top, left-6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) 
        

In [67]:
import cv2
import numpy as np

if __name__ == "__main__":
    BOX_THRESH=0.2
    NMS_THRESH=0.6
    IMG_SIZE=320
    model = "./phone_int8_0630.tflite"
    img_path = "./20230703/0_frame_000000000.bmp"
    CLASSES=("person", "phone")
    IMG_PATH = "./20230703/0_frame_000000000.bmp"
    SAVE_PATH = "./result1.png"
        
    # 手机检测    
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
    anchors = [[17, 21], [23, 36], [32, 57], 
               [40, 98], [54, 136], [68, 189], 
               [80, 122], [93, 249], [114, 167]]
    
    
    img = cv.imread(img_path) 
    # img = np.array([img])-128.0
    img = np.array([img])/128-1.0
    img = np.array(img, dtype='int8')
    # img = preprocess_image(img)
    # img = np.transpose(img, (0, 2, 3, 1))
    # img = img/255-0.5
    output = run_inference(model, img)
    print("output shape:",len(output))
    print(np.shape(output[0]), np.shape(output[1]), np.shape(output[2]))   
    
    # post process
    input0_data = output[0]
    input1_data = output[1]
    input2_data = output[2] 
    
    """
        output tensor dim: anchors * (5+num_cls) * maps_h * mapx_w
        while anchors = 3, 5 means [offset_x, offset_y, offset_w, offset_h, object_confidence]
    """  
    input0_data = input0_data.reshape([3,-1] + list(input0_data.shape[-2:])) 
    input1_data = input1_data.reshape([3,-1] + list(input1_data.shape[-2:]))
    input2_data = input2_data.reshape([3,-1] + list(input2_data.shape[-2:]))

    print(np.shape(input0_data))    
    input_data = list()
    input_data.append(np.transpose(input0_data, (1, 3, 0, 2)))
    input_data.append(np.transpose(input1_data, (1, 3, 0, 2)))
    input_data.append(np.transpose(input2_data, (1, 3, 0, 2)))
    
    
    print(np.shape(input_data[0]), np.shape(input_data[1]), np.shape(input_data[2]))    
    boxes, classes, scores = yolov5_post_process(input_data)
    print(np.shape(boxes))
    
    img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(img_1, boxes, scores, classes)
        
    cv2.imwrite(SAVE_PATH, img_1) 

output shape: 3
(1, 40, 3, 7, 40) (1, 10, 3, 7, 10) (1, 20, 3, 7, 20)
(3, 40, 7, 40)
(40, 40, 3, 7) (10, 10, 3, 7) (20, 20, 3, 7)
(359, 4)


  return 1/(1+np.exp(-x))


error: OpenCV(4.7.0) /io/opencv/modules/imgproc/src/color.simd_helpers.hpp:94: error: (-2:Unspecified error) in function 'cv::impl::{anonymous}::CvtHelper<VScn, VDcn, VDepth, sizePolicy>::CvtHelper(cv::InputArray, cv::OutputArray, int) [with VScn = cv::impl::{anonymous}::Set<1>; VDcn = cv::impl::{anonymous}::Set<3, 4>; VDepth = cv::impl::{anonymous}::Set<0, 2, 5>; cv::impl::{anonymous}::SizePolicy sizePolicy = cv::impl::<unnamed>::NONE; cv::InputArray = const cv::_InputArray&; cv::OutputArray = const cv::_OutputArray&]'
> Unsupported depth of input image:
>     'VDepth::contains(depth)'
> where
>     'depth' is 1 (CV_8S)
