In [1]:
import os
import glob
from tqdm import tqdm

# --- 配置 ---
label_dir = r"C:\Users\User\Desktop\YOLO\Gold-YOLO\dataset\coco\labels\train2017"  # 修改為您的標籤文件夾路徑
num_classes = 80  # COCO 數據集的類別數
# --- 配置結束 ---

invalid_files = []
invalid_lines_info = {} # 記錄有問題的行

print(f"開始檢查目錄: {label_dir}")

# 使用 glob 查找所有 .txt 文件
label_files = glob.glob(os.path.join(label_dir, '*.txt'))

if not label_files:
    print(f"錯誤：在 {label_dir} 中找不到任何 .txt 標籤文件。請檢查路徑。")
else:
    print(f"找到 {len(label_files)} 個標籤文件，開始檢查...")
    for file_path in tqdm(label_files, desc="檢查標籤文件"):
        file_has_issue = False
        try:
            with open(file_path, 'r') as f:
                lines = f.readlines()
                if not lines: # 檢查空文件
                    print(f"警告: 文件 {os.path.basename(file_path)} 為空。")
                    if file_path not in invalid_files:
                        invalid_files.append(file_path)
                    file_has_issue = True
                    continue # 繼續檢查下一個文件

                for i, line in enumerate(lines):
                    line_num = i + 1
                    parts = line.strip().split()
                    line_issue = False
                    error_msg = ""

                    # 1. 檢查格式：是否為 5 個部分
                    if len(parts) != 5:
                        error_msg = f"格式錯誤 (應有 5 個值，實際為 {len(parts)})"
                        line_issue = True
                    else:
                        try:
                            # 2. 檢查數值類型和範圍
                            class_id = int(parts[0])
                            x_center = float(parts[1])
                            y_center = float(parts[2])
                            width = float(parts[3])
                            height = float(parts[4])

                            if not (0 <= class_id < num_classes):
                                error_msg = f"Class ID {class_id} 超出範圍 [0, {num_classes-1}]"
                                line_issue = True
                            elif not (0.0 <= x_center <= 1.0):
                                error_msg = f"x_center {x_center} 超出範圍 [0, 1]"
                                line_issue = True
                            elif not (0.0 <= y_center <= 1.0):
                                error_msg = f"y_center {y_center} 超出範圍 [0, 1]"
                                line_issue = True
                            elif not (0.0 < width <= 1.0): # 寬度必須 > 0
                                error_msg = f"width {width} 無效 (必須 > 0 且 <= 1)"
                                line_issue = True
                            elif not (0.0 < height <= 1.0): # 高度必須 > 0
                                error_msg = f"height {height} 無效 (必須 > 0 且 <= 1)"
                                line_issue = True
                            # 可選：檢查邊界
                            # elif x_center - width / 2 < 0 or x_center + width / 2 > 1 or \
                            #      y_center - height / 2 < 0 or y_center + height / 2 > 1:
                            #     error_msg = f"邊界框超出圖像範圍"
                            #     line_issue = True

                        except ValueError:
                            error_msg = "數值轉換錯誤 (非數字?)"
                            line_issue = True

                    if line_issue:
                        file_has_issue = True
                        if file_path not in invalid_lines_info:
                            invalid_lines_info[file_path] = []
                        invalid_lines_info[file_path].append(f"  行 {line_num}: {line.strip()} -> {error_msg}")

            if file_has_issue and file_path not in invalid_files:
                 invalid_files.append(file_path)

        except Exception as e:
            print(f"\n處理文件 {os.path.basename(file_path)} 時發生錯誤: {e}")
            if file_path not in invalid_files:
                invalid_files.append(file_path)

    print("\n檢查完成。")

    if invalid_files:
        print(f"\n發現 {len(invalid_files)} 個文件可能存在問題:")
        # 打印有問題的文件及具體行信息
        for file_path in invalid_files:
            print(f"- {os.path.basename(file_path)}")
            if file_path in invalid_lines_info:
                for line_info in invalid_lines_info[file_path]:
                    print(line_info)
            else:
                 print("  (文件級別錯誤或為空)") # 如果沒有行級別信息，說明是文件本身問題
    else:
        print("\n所有標籤文件格式和數值範圍看起來都正常。")


開始檢查目錄: C:\Users\User\Desktop\YOLO\Gold-YOLO\dataset\coco\labels\train2017
找到 117266 個標籤文件，開始檢查...


檢查標籤文件: 100%|██████████| 117266/117266 [14:07<00:00, 138.43it/s]


檢查完成。

所有標籤文件格式和數值範圍看起來都正常。



