In [None]:
# ==============================================================================
# YOLOv5s + EfficientNetV2s 雙階段辨識流程（F/S/V三分類＋推論速度＋自動打包）
# ==============================================================================

import os
import shutil
import time
from datetime import datetime
from google.colab import drive
import torch
import timm
import cv2
import pandas as pd
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import torchvision.transforms as transforms
from IPython.display import display as ipy_display
import warnings
warnings.filterwarnings('ignore')

# 1. 掛載 Google Drive
drive.mount('/content/drive')

# ====== 克隆 YOLOv5 程式庫 ======
if not os.path.exists('/content/yolov5'):
    !git clone https://github.com/ultralytics/yolov5.git /content/yolov5
    %cd /content/yolov5
    !pip install -r requirements.txt
    %cd /content

# 2. 使用者設定區
YOLO_MODEL_PATH = '/content/drive/MyDrive/DualStage-DefectAI/程式/yolo+eff模型與測試集/yolo_best.pt'
EFFICIENTNET_MODEL_PATH = '/content/drive/MyDrive/DualStage-DefectAI/程式/yolo+eff模型與測試集/eff_best.pth'
TEST_IMAGES_DIR = '/content/drive/MyDrive/DualStage-DefectAI/程式/yolo+eff模型與測試集/測試集/images'
OUTPUT_DIR_ON_DRIVE = '/content/drive/MyDrive/DualStage-DefectAI/輸出/yolo+efficientnet'
YOLO_CONF_THRESHOLD = 0.2
CROP_PADDING_RATIO = 0.1

# 3. 檢查路徑
paths_to_check = {
    "YOLO 模型": YOLO_MODEL_PATH,
    "EfficientNetV2 模型": EFFICIENTNET_MODEL_PATH,
    "測試圖片資料夾": TEST_IMAGES_DIR,
}
all_paths_ok = True
for name, path in paths_to_check.items():
    if not os.path.exists(path):
        print(f"✗ 錯誤: 找不到 {name} -> {path}")
        all_paths_ok = False
if not all_paths_ok:
    raise FileNotFoundError("有路徑設定錯誤，請檢查上方訊息！")

# 4. 載入模型
@torch.no_grad()
def load_models():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    yolo_model = torch.hub.load('/content/yolov5', 'custom', path=YOLO_MODEL_PATH, source='local')
    yolo_model.conf = YOLO_CONF_THRESHOLD
    yolo_model.to(device)
    yolo_model.eval()
    # 此處將class_names改為三類
    checkpoint = torch.load(EFFICIENTNET_MODEL_PATH, map_location=device)
    effnet_class_names = checkpoint.get('class_names', ['F', 'S', 'V'])
    num_classes = len(effnet_class_names)
    effnet_model = timm.create_model('tf_efficientnetv2_s.in1k', pretrained=False, num_classes=num_classes)
    effnet_model.load_state_dict(checkpoint['model_state_dict'])
    effnet_model.to(device)
    effnet_model.eval()
    return yolo_model, effnet_model, effnet_class_names, device

def get_efficientnet_transforms():
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

def draw_on_image(image, detections_df):
    img_draw = image.copy()
    draw = ImageDraw.Draw(img_draw)
    try:
        font = ImageFont.truetype("LiberationSans-Regular.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    for _, row in detections_df.iterrows():
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        # 判斷階段
        if 'effnet_class' in row:
            effnet_class = row.get('effnet_class', '')
            # === 三分類顏色標示：紅F 藍S 橘V ===
            if effnet_class == 'F':
                color = 'red'
            elif effnet_class == 'S':
                color = 'blue'
            elif effnet_class == 'V':
                color = 'orange'
            else:
                color = 'gray'
            confidence = row.get('effnet_confidence', 0)
            label = f"{effnet_class}: {confidence:.2%}"
        else:
            yolo_class = row['name']
            color = 'red' if row['name'] == 'F' else 'blue'
            confidence = row.get('confidence', 0)
            label = f"{yolo_class}: {confidence:.2%}"

        draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
        try:
            text_bbox = draw.textbbox((0, 0), label, font=font)
            text_w = text_bbox[2] - text_bbox[0]
            text_h = text_bbox[3] - text_bbox[1]
            text_y_pos = y1 - text_h - 10
            if text_y_pos < 0: text_y_pos = y1 + 5
            draw.rectangle([x1, text_y_pos, x1 + text_w + 10, text_y_pos + text_h + 5], fill=color)
            draw.text((x1 + 5, text_y_pos), label, fill='white', font=font)
        except AttributeError:
            draw.text((x1 + 5, y1 - 20), label, fill=color, font=font)
    return img_draw

# 5. 單張圖片處理（同時回傳推論時間）
@torch.no_grad()
def process_image(image_path, yolo_model, effnet_model, effnet_transforms, effnet_class_names, device):
    try:
        original_image = Image.open(image_path).convert('RGB')
    except Exception as e:
        print(f"✗ 無法讀取圖片 {os.path.basename(image_path)}: {e}")
        return None, pd.DataFrame(), pd.DataFrame(), 0, 0
    img_w, img_h = original_image.size
    # YOLOv5s 物件偵測
    t0 = time.time()
    yolo_results = yolo_model(original_image)
    t1 = time.time()
    yolo_df = yolo_results.pandas().xyxy[0]
    yolo_infer_time = t1 - t0
    # EfficientNetV2 分類（僅針對 'F' 類別）
    f_detections = yolo_df[yolo_df['name'] == 'F'].copy()
    detailed_results = []
    effnet_total_time = 0
    for _, row in f_detections.iterrows():
        x1, y1, x2, y2 = row['xmin'], row['ymin'], row['xmax'], row['ymax']
        pad_w = (x2 - x1) * CROP_PADDING_RATIO
        pad_h = (y2 - y1) * CROP_PADDING_RATIO
        x1_pad = max(0, x1 - pad_w)
        y1_pad = max(0, y1 - pad_h)
        x2_pad = min(img_w, x2 + pad_w)
        y2_pad = min(img_h, y2 + pad_h)
        cropped_image = original_image.crop((x1_pad, y1_pad, x2_pad, y2_pad))
        input_tensor = effnet_transforms(cropped_image).unsqueeze(0).to(device)
        t2 = time.time()
        output = effnet_model(input_tensor)
        probabilities = torch.softmax(output, dim=1)
        t3 = time.time()
        effnet_total_time += (t3 - t2)
        confidence, predicted_idx = torch.max(probabilities, 1)
        predicted_class = effnet_class_names[predicted_idx.item()]
        result_row = row.to_dict()
        result_row['effnet_class'] = predicted_class
        result_row['effnet_confidence'] = confidence.item()
        detailed_results.append(result_row)
    effnet_df = pd.DataFrame(detailed_results)
    return original_image, yolo_df, effnet_df, yolo_infer_time, effnet_total_time

# 6. 主流程
def main():
    yolo_model, effnet_model, effnet_class_names, device = load_models()
    effnet_transforms = get_efficientnet_transforms()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    %cd /content
    local_output_dir = f'results_{timestamp}'
    annotated_img_dir = os.path.join(local_output_dir, 'annotated_images')
    yolo_only_img_dir = os.path.join(local_output_dir, 'yolo_only_images')
    os.makedirs(annotated_img_dir, exist_ok=True)
    os.makedirs(yolo_only_img_dir, exist_ok=True)
    image_files = [f for f in os.listdir(TEST_IMAGES_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if not image_files:
        print(f"✗ 在 '{TEST_IMAGES_DIR}' 中找不到任何圖片檔案。")
        return

    master_effnet_results = []
    master_yolo_results = []
    total_yolo_time = 0
    total_effnet_time = 0
    image_count = 0

    for i, filename in enumerate(image_files):
        print(f"\n{'='*20} ({i+1}/{len(image_files)}) 正在處理: {filename} {'='*20}")
        image_path = os.path.join(TEST_IMAGES_DIR, filename)
        original_image, yolo_df, effnet_df, yolo_time, effnet_time = process_image(
            image_path, yolo_model, effnet_model, effnet_transforms, effnet_class_names, device
        )
        if original_image is None:
            continue
        # ==================== 顯示 YOLOv5s 第一階段結果 (僅繪製 'F' 類) ====================
        print("\n🔵 [階段一] YOLOv5 原始偵測結果：")
        if not yolo_df.empty:
            print(" ├─ 文字報表 (偵測到的所有物件):")
            print(yolo_df[['name', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']].to_string())
            yolo_f_only_df = yolo_df[yolo_df['name'] == 'F']
            if not yolo_f_only_df.empty:
                print(" └─ 視覺化結果 (僅顯示 'F' 類物件):")
                yolo_annotated_image = draw_on_image(original_image, yolo_f_only_df)
                ipy_display(yolo_annotated_image)
                yolo_annotated_image.save(os.path.join(yolo_only_img_dir, f"yolo_only_{filename}"))
            else:
                print(" └─ 視覺化結果: YOLOv5 未偵測到 'F' 類物件，因此無預覽圖。")
            yolo_df['source_image'] = filename
            master_yolo_results.append(yolo_df)
        else:
            print(" └─ 未偵測到任何物件。")

        # ==================== 雙階段結合最終結果 ====================
        print("\n✅ [最終結果] 雙階段結合辨識：")
        if not effnet_df.empty:
            effnet_df['source_image'] = filename
            master_effnet_results.append(effnet_df)
            final_annotated_image = draw_on_image(original_image, effnet_df)
            final_annotated_image.save(os.path.join(annotated_img_dir, f"annotated_{filename}"))
            print(f" ├─ EfficientNetV2 已分類 {len(effnet_df)} 個 'F' 物件：")
            print(effnet_df[['effnet_class', 'effnet_confidence', 'xmin', 'ymin', 'xmax', 'ymax']].to_string())
            print(" └─ 視覺化結果:")
            ipy_display(final_annotated_image)
        else:
            print(" └─ 無第二階段分類結果 (未偵測到 'F' 類物件)。")
        total_yolo_time += yolo_time
        total_effnet_time += effnet_time
        image_count += 1

    # 統計推論速度
    if image_count > 0:
        print(f"\n====== 推論速度統計 ======")
        print(f"YOLOv5s 平均單張推論時間：{total_yolo_time / image_count:.4f} 秒")
        print(f"EfficientNetV2s 平均單張推論時間：{total_effnet_time / image_count:.4f} 秒")
        print(f"雙階段合計平均單張推論時間：{(total_yolo_time + total_effnet_time) / image_count:.4f} 秒")
        print(f"YOLOv5s + EfficientNetV2s 平均 FPS：{image_count / (total_yolo_time + total_effnet_time):.2f}")

    # 儲存結果 CSV
    if master_yolo_results:
        yolo_all_df = pd.concat(master_yolo_results, ignore_index=True)
        yolo_csv_path = os.path.join(local_output_dir, 'yolo_all_results.csv')
        yolo_all_df.to_csv(yolo_csv_path, index=False, encoding='utf-8-sig')
    if master_effnet_results:
        effnet_all_df = pd.concat(master_effnet_results, ignore_index=True)
        effnet_csv_path = os.path.join(local_output_dir, 'effnet_results.csv')
        effnet_all_df.to_csv(effnet_csv_path, index=False, encoding='utf-8-sig')

    # 打包標註圖與結果
    if os.path.exists(local_output_dir) and os.listdir(local_output_dir):
        print("\n› 正在打包所有標註圖片與結果...")
        shutil.make_archive(local_output_dir, 'zip', local_output_dir)
        zip_path = f"/content/{local_output_dir}.zip"
        shutil.copy(zip_path, OUTPUT_DIR_ON_DRIVE)
        print(f"✓ 打包完成，ZIP 檔已儲存到您的雲端：{os.path.join(OUTPUT_DIR_ON_DRIVE, os.path.basename(zip_path))}")
        shutil.rmtree(local_output_dir)
        os.remove(zip_path)
    else:
        print("\n› 沒有產生任何輸出結果，無需打包。")

if __name__ == '__main__':
    main()


Output hidden; open in https://colab.research.google.com to view.