In [8]:
import onnxruntime as ort
import onnx
import cv2
import numpy as np
from typing import Tuple, List, Dict, Optional
import warnings
warnings.filterwarnings('ignore')

# -------------------------- 配置参数 --------------------------
MODEL_PATH = "test.onnx"        # 模型路径
IMAGE_PATH = "test.jpg"         # 测试图像路径
INITIAL_CONF_THRESHOLD = 0.9    # 初始置信度阈值（从高开始）
FINAL_CONF_THRESHOLD = 0.05     # 最低置信度阈值
STEP = 0.05                     # 每次降低的步长（0.05即5%）
IOU_THRESHOLD = 0.4             # NMS IOU阈值（保持不变）
CLASS_NAMES = {0: "目标"}       # 类别映射（根据模型修改）
EXECUTION_PROVIDERS = ["CPUExecutionProvider"]  # 执行设备
# --------------------------------------------------------------

def preprocess_image(image_path: str, input_size: Tuple[int, int]) -> Tuple[np.ndarray, Tuple[float, int, int]]:
    """图像预处理（保持长宽比缩放+填充）"""
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"无法读取图像：{image_path}")
    
    # 转换为RGB（根据模型需求调整，若模型用BGR则注释此行）
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    h, w = image.shape[:2]
    target_w, target_h = input_size
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    image_resized = cv2.resize(image, (new_w, new_h))
    
    canvas = np.zeros((target_h, target_w, 3), dtype=np.uint8)
    offset_x, offset_y = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[offset_y:offset_y+new_h, offset_x:offset_x+new_w, :] = image_resized
    
    canvas = canvas.astype(np.float32)  # 归一化到[0,1]（根据模型调整）
    canvas = np.transpose(canvas, (2, 0, 1))
    canvas = np.expand_dims(canvas, axis=0)
    return canvas, (scale, offset_x, offset_y)

def postprocess_output(
    output: List[np.ndarray], 
    input_size: Tuple[int, int],
    image_shape: Tuple[int, int],
    scale: float,
    offset_x: int,
    offset_y: int,
    conf_threshold: float
) -> Tuple[List[np.ndarray], List[float], List[int]]:
    """后处理（适配 [x, y, w, h, conf, class_id] 格式输出）"""
    outputs = output[0][0]  # 提取 [14175, 6] 数据
    boxes, scores, class_ids = [], [], []
    
    for detection in outputs:
        x, y, w, h, conf, class_id = detection
        
        # 假设 x, y, w, h 是模型输入尺寸（480x480）下的绝对像素坐标
        # 直接修正偏移和缩放，无需乘以 input_size
        x = (x - offset_x) / scale  # 移除 x * input_size[0]
        y = (y - offset_y) / scale  # 移除 y * input_size[1]
        w = w / scale  # 移除 w * input_size[0]
        h = h / scale  # 移除 h * input_size[1]
        if conf < conf_threshold:
            continue  # 过滤低置信度
        
        # 坐标修正（映射回原始图像）
        x = (x * input_size[0] - offset_x) / scale
        y = (y * input_size[1] - offset_y) / scale
        w = w * input_size[0] / scale
        h = h * input_size[1] / scale
        
        x1, y1 = max(0, x - w/2), max(0, y - h/2)
        x2, y2 = min(image_shape[1], x + w/2), min(image_shape[0], y + h/2)
        
        boxes.append([x1, y1, x2, y2])
        scores.append(float(conf))
        class_ids.append(int(class_id))
    
    # NMS非极大值抑制
    if boxes:
        indices = cv2.dnn.NMSBoxes(boxes, scores, conf_threshold, IOU_THRESHOLD)
        if len(indices) > 0:
            indices = indices.flatten()
            boxes = [boxes[i] for i in indices]
            scores = [scores[i] for i in indices]
            class_ids = [class_ids[i] for i in indices]
    
    return boxes, scores, class_ids

def load_model(model_path: str, providers: List[str]) -> Tuple[ort.InferenceSession, str, str, Tuple[int, int]]:
    """加载模型并获取输入信息"""
    onnx_model = onnx.load(model_path)
    onnx.checker.check_model(onnx_model)
    session = ort.InferenceSession(model_path, providers=providers)
    
    input_info = session.get_inputs()[0]
    output_info = session.get_outputs()[0]
    input_shape = input_info.shape  # [1, 3, 480, 480]
    input_size = (input_shape[3], input_shape[2])  # (480, 480)
    
    return session, input_info.name, output_info.name, input_size

def test_confidence_thresholds():
    try:
        # 加载模型和图像
        session, input_name, output_name, input_size = load_model(MODEL_PATH, EXECUTION_PROVIDERS)
        raw_image = cv2.imread(IMAGE_PATH)
        if raw_image is None:
            raise FileNotFoundError(f"图像路径错误：{IMAGE_PATH}")
        image_shape = raw_image.shape[:2]  # (h, w)
        
        # 预处理图像（只做一次，避免重复计算）
        preprocessed_img, (scale, offset_x, offset_y) = preprocess_image(IMAGE_PATH, input_size)
        
        # 循环测试不同置信度阈值（从高到低）
        thresholds = np.arange(INITIAL_CONF_THRESHOLD, FINAL_CONF_THRESHOLD - STEP/2, -STEP)
        first_detection_threshold = None
        first_detection_count = 0
        
        print(f"开始测试置信度阈值（从 {INITIAL_CONF_THRESHOLD} 到 {FINAL_CONF_THRESHOLD}，步长 {STEP}）...\n")
        
        for conf_thresh in thresholds:
            conf_thresh = round(conf_thresh, 2)  # 保留两位小数
            
            # 推理（复用预处理结果）
            output = session.run([output_name], {input_name: preprocessed_img})
            
            # 后处理
            boxes, scores, class_ids = postprocess_output(
                output, input_size, image_shape, scale, offset_x, offset_y, conf_thresh
            )
            
            # 记录结果
            print(f"置信度阈值: {conf_thresh} → 检测到 {len(boxes)} 个目标")
            
            # 首次检测到目标时记录阈值
            if len(boxes) > 0 and first_detection_threshold is None:
                first_detection_threshold = conf_thresh
                first_detection_count = len(boxes)
        
        # 输出总结
        print("\n" + "="*50)
        if first_detection_threshold is not None:
            print(f"首次检测到目标的置信度阈值：{first_detection_threshold}（检测到 {first_detection_count} 个目标）")
            print(f"建议使用阈值：{first_detection_threshold}（兼顾精度和召回率）")
        else:
            print(f"在所有测试阈值（最低 {FINAL_CONF_THRESHOLD}）下均未检测到目标")
        print("="*50)
    
    except Exception as e:
        print(f"执行错误：{str(e)}")

if __name__ == "__main__":
    test_confidence_thresholds()

开始测试置信度阈值（从 0.9 到 0.05，步长 0.05）...

置信度阈值: 0.9 → 检测到 37 个目标
置信度阈值: 0.85 → 检测到 43 个目标
置信度阈值: 0.8 → 检测到 50 个目标
置信度阈值: 0.75 → 检测到 54 个目标
置信度阈值: 0.7 → 检测到 62 个目标
置信度阈值: 0.65 → 检测到 67 个目标
置信度阈值: 0.6 → 检测到 71 个目标
置信度阈值: 0.55 → 检测到 81 个目标
置信度阈值: 0.5 → 检测到 89 个目标
置信度阈值: 0.45 → 检测到 97 个目标
置信度阈值: 0.4 → 检测到 107 个目标
置信度阈值: 0.35 → 检测到 116 个目标
置信度阈值: 0.3 → 检测到 126 个目标
置信度阈值: 0.25 → 检测到 135 个目标
置信度阈值: 0.2 → 检测到 143 个目标
置信度阈值: 0.15 → 检测到 165 个目标
置信度阈值: 0.1 → 检测到 196 个目标
置信度阈值: 0.05 → 检测到 276 个目标

首次检测到目标的置信度阈值：0.9（检测到 37 个目标）
建议使用阈值：0.9（兼顾精度和召回率）


In [9]:
import cv2
import numpy as np
import onnxruntime as ort

def preprocess(image, input_shape):
    """预处理图像：适配320x320输入尺寸"""
    h, w = input_shape  # 此处为(320, 320)
    img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR转RGB
    original_h, original_w = img.shape[:2]
    
    # 计算缩放比例（保持原图比例）
    scale = min(w / original_w, h / original_h)
    new_w, new_h = int(original_w * scale), int(original_h * scale)
    img_resized = cv2.resize(img, (new_w, new_h))  # 按比例缩放
    
    # 创建320x320画布并居中放置图像
    pad_h = (h - new_h) // 2
    pad_w = (w - new_w) // 2
    canvas = np.zeros((h, w, 3), dtype=np.uint8)
    canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = img_resized
    
    # 归一化并转换为CHW格式
    canvas = canvas.astype(np.float32) / 255.0  # 归一化到0-1
    canvas = np.transpose(canvas, (2, 0, 1))  # HWC -> CHW
    return canvas[np.newaxis, ...], scale, pad_h, pad_w  # 添加批次维度

def postprocess(outputs, scale, pad_h, pad_w, original_shape, conf_threshold=0.5):
    """后处理：解析6300个检测框（输出格式[1,6300,7]）"""
    original_h, original_w = original_shape
    boxes = []
    
    # 提取检测框数据（去除批次维度，得到[6300,7]）
    output = outputs[0][0]
    
    for box in output:
        # 解析输出：x1,y1,x2,y2(320x320尺度), conf(置信度), class_prob1, class_prob2
        x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
        conf = box[4]  # 置信度
        class_probs = box[5:]  # 类别概率（长度为2，因输出总长度为7）
        class_id = np.argmax(class_probs)  # 取概率最大的类别ID
        
        # 过滤低置信度框
        if conf < conf_threshold:
            continue
        
        # 转换坐标到原图尺度
        x1 = (x1 - pad_w) / scale
        y1 = (y1 - pad_h) / scale
        x2 = (x2 - pad_w) / scale
        y2 = (y2 - pad_h) / scale
        
        # 边界检查
        x1 = max(0, min(int(x1), original_w))
        y1 = max(0, min(int(y1), original_h))
        x2 = max(0, min(int(x2), original_w))
        y2 = max(0, min(int(y2), original_h))
        
        boxes.append((x1, y1, x2, y2, class_id, conf))
    return boxes

def draw_boxes(image, boxes):
    """只绘制检测框，不显示标签"""
    for (x1, y1, x2, y2, _, _) in boxes:
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)  # 绿色框，线宽2
    return image

def main():
    # 路径配置
    onnx_model_path = "bfs3_320.onnx"
    image_path = "test.jpg"
    output_path = "output.jpg"
    conf_threshold = 0.5  # 置信度阈值
    
    # 加载模型
    session = ort.InferenceSession(onnx_model_path)
    input_name = session.get_inputs()[0].name  # 输入名称为"images"
    input_shape = session.get_inputs()[0].shape[2:]  # 提取[H,W]，即(320,320)
    
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"无法读取图像: {image_path}")
    original_shape = image.shape[:2]
    
    # 预处理与推理
    input_tensor, scale, pad_h, pad_w = preprocess(image, input_shape)
    outputs = session.run(None, {input_name: input_tensor})
    
    # 后处理
    boxes = postprocess(outputs, scale, pad_h, pad_w, original_shape, conf_threshold)
    
    # 输出检测信息
    print(f"检测到的目标数量: {len(boxes)}")
    print(f"使用的置信度阈值: {conf_threshold}")
    if boxes:
        print("各目标的置信度:")
        for i, (_, _, _, _, _, conf) in enumerate(boxes, 1):
            print(f"  目标{i}: {conf:.4f}")
    
    # 保存结果
    result_image = draw_boxes(image.copy(), boxes)
    cv2.imwrite(output_path, result_image)
    print(f"\n检测结果已保存至: {output_path}")

if __name__ == "__main__":
    main()

检测到的目标数量: 0
使用的置信度阈值: 0.5

检测结果已保存至: output.jpg


In [6]:
import onnx

# 加载 ONNX 模型
model = onnx.load("bfs3_320.onnx")

# 解析输入信息
print("===== 模型输入信息 =====")
for input_tensor in model.graph.input:
    # 输入名称（代码中需匹配此名称）
    input_name = input_tensor.name
    # 输入形状（维度）
    input_shape = [dim.dim_value for dim in input_tensor.type.tensor_type.shape.dim]
    # 输入数据类型
    input_dtype = input_tensor.type.tensor_type.elem_type
    # 转换数据类型为可读格式（1=float32，7=int64等，参考 ONNX 官方定义）
    dtype_map = {1: "float32", 7: "int64"}
    input_dtype_str = dtype_map.get(input_dtype, f"未知类型（{input_dtype}）")
    
    print(f"输入名称: {input_name}")
    print(f"输入形状: {input_shape}（格式通常为 [N, C, H, W]，N=批量大小，C=通道数）")
    print(f"数据类型: {input_dtype_str}\n")

# 解析输出信息
print("===== 模型输出信息 =====")
for output_tensor in model.graph.output:
    output_name = output_tensor.name
    output_shape = [dim.dim_value for dim in output_tensor.type.tensor_type.shape.dim]
    output_dtype = output_tensor.type.tensor_type.elem_type
    output_dtype_str = dtype_map.get(output_dtype, f"未知类型（{output_dtype}）")
    
    print(f"输出名称: {output_name}")
    print(f"输出形状: {output_shape}（YOLO 通常为 [N, 检测框数, 5+类别数]）")
    print(f"数据类型: {output_dtype_str}\n")

===== 模型输入信息 =====
输入名称: images
输入形状: [1, 3, 320, 320]（格式通常为 [N, C, H, W]，N=批量大小，C=通道数）
数据类型: float32

===== 模型输出信息 =====
输出名称: output
输出形状: [1, 6300, 7]（YOLO 通常为 [N, 检测框数, 5+类别数]）
数据类型: float32

