In [6]:
# 使用 YOLO 物件辨識+SAHI，寫入檔案output_annotated_with_SAHI.mp4

import os

#choosing the 2nd  GPU card.
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


In [None]:

import cv2
from ultralytics import YOLO
from sahi import AutoDetectionModel # 支援多種物件檢測模板的factory class
from sahi.predict import get_sliced_prediction # predict:使用指定的物件辨識模型而非coco；執行sliced inference 的函式

model_name = "train5"
model = f"runs/detect/{model_name}/weights/last.pt"
CONFIDENCE_SCORE = 0.1

detection_model = AutoDetectionModel.from_pretrained(
    model_type='yolov11', # ULTRALYTICS_MODEL_NAMES = ["yolov8", "yolov11", "yolo11", "ultralytics"]
    model_path=model, 
    confidence_threshold = CONFIDENCE_SCORE,
    device="cuda:0") # 待確認



# Open the video file
video_name = "cigarette_video4"
video_path = f"input/{video_name}.mp4"
cap = cv2.VideoCapture(video_path)

# 取得影片的 FPS、解析度
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# 設定輸出影片編碼格式（這裡用 MP4V）和輸出檔案名稱
output_path = f"output/{video_name}_SAHI({model_name},conf{CONFIDENCE_SCORE}).mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # 編解碼的四個字元
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # SAHI: 執行切片物件偵測
        results = get_sliced_prediction(
            image=frame,
            detection_model=detection_model,
            slice_height=512,
            slice_width=512,
            overlap_height_ratio=0.1,
            overlap_width_ratio=0.1
        )
        object_prediction_list = results.object_prediction_list
        for prediction in object_prediction_list:  # 遍歷所有檢測結果
            bbox = prediction.bbox  # 獲取邊界框
            cls = prediction.category.name  # 獲取物件類別名稱
            
            # 在幀上繪製boundingbox
            cv2.rectangle(frame, (int(bbox.minx), int(bbox.miny)), (int(bbox.maxx), int(bbox.maxy)), (56, 56, 255), 2)
            
            # 添加類別標籤
            label = str(cls)
            t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0]
            
            # 畫出文字後的背景色塊
            cv2.rectangle(
                frame, (int(bbox.minx), int(bbox.miny) - t_size[1] - 3), (int(bbox.minx) + t_size[0], int(bbox.miny) + 3), (56, 56, 255), -1
            )

            # 加入文字
            cv2.putText(
                frame, label, (int(bbox.minx), int(bbox.miny) - 2), 0, 0.6, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA
            )

        out.write(frame)
    else:
        # Break the loop if the end of the video is reached
        break

# Release resources
cap.release()
out.release()  # 釋放 VideoWriter
cv2.destroyAllWindows()
print("done")
