In [None]:
# 제어 입력 없는 모델(가속도 고려 안함)

import cv2
import numpy as np
import os
from transformers import AutoImageProcessor, AutoModelForObjectDetection
import torch
from PIL import Image

# --- 설정 변수 ---
VIDEO_INPUT_PATH = "KF_vid3.mp4"
VIDEO_OUTPUT_PATH = "KF_vid_o.mp4"
USER_TARGET_CLASS_NAME = 'airplane' # 예: "clock"
HF_MODEL_NAME = "facebook/detr-resnet-50"
CONFIDENCE_THRESHOLD = 0.5

# --- 칼만 필터 파라미터 ---
accel_noise_std = 0.5
measurement_noise_std = 8.0

# --- Hugging Face 모델 및 프로세서 로드 ---
try:
    image_processor = AutoImageProcessor.from_pretrained(HF_MODEL_NAME)
    model = AutoModelForObjectDetection.from_pretrained(HF_MODEL_NAME)
    print(f"Hugging Face 모델 '{HF_MODEL_NAME}' 로드 성공.")
except Exception as e:
    print(f"오류: Hugging Face 모델 '{HF_MODEL_NAME}' 로드 실패. ({e})")
    image_processor = None
    model = None

def detect_object_huggingface(frame_color_cv, frame_color_for_draw, frame_idx_for_debug=0):
    global USER_TARGET_CLASS_NAME

    if model is None or image_processor is None:
        return None, None, set() # 모델 로드 실패 시 빈 세트 반환

    image_pil = Image.fromarray(cv2.cvtColor(frame_color_cv, cv2.COLOR_BGR2RGB))
    original_width, original_height = image_pil.size
    inputs = image_processor(images=image_pil, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)

    target_sizes = torch.tensor([image_pil.size[::-1]])
    results = image_processor.post_process_object_detection(outputs,
                                                              threshold=CONFIDENCE_THRESHOLD,
                                                              target_sizes=target_sizes)[0]

    best_target_detection = None
    detected_classes_in_frame = set() 

    for score, label_id, box in zip(results["scores"], results["labels"], results["boxes"]):
        label_name = model.config.id2label[label_id.item()].lower()
        detected_classes_in_frame.add(label_name)

        if USER_TARGET_CLASS_NAME is not None and label_name == USER_TARGET_CLASS_NAME.lower():
            box_coords = box.tolist()
            startX, startY, endX, endY = int(box_coords[0]), int(box_coords[1]), int(box_coords[2]), int(box_coords[3])
            startX, startY = max(0, startX), max(0, startY)
            endX, endY = min(original_width, endX), min(original_height, endY)
            bbox = (startX, startY, endX - startX, endY - startY)
            cx = int((startX + endX) / 2.0)
            cy = int((startY + endY) / 2.0)
            current_score = score.item()
            if best_target_detection is None or current_score > best_target_detection[0]:
                best_target_detection = (current_score, cx, cy, bbox)

    if frame_idx_for_debug < 5 or USER_TARGET_CLASS_NAME is None:
        if detected_classes_in_frame:
            print(f"[Frame {frame_idx_for_debug}] Detected classes: {list(detected_classes_in_frame)}. "
                  f"Set USER_TARGET_CLASS_NAME if metronome is among them.")
        elif frame_idx_for_debug < 5:
             print(f"[Frame {frame_idx_for_debug}] No objects detected with confidence > {CONFIDENCE_THRESHOLD}.")

    if best_target_detection:
        score, cx, cy, bbox = best_target_detection
        # --- 디버깅 출력 추가 ---
        print(f"[Frame {frame_idx_for_debug} DETECT] Target '{USER_TARGET_CLASS_NAME}' found. Score: {score:.2f}, Raw Coords: ({cx}, {cy})")
        # --- 여기까지 ---
        (x_b, y_b, w_b, h_b) = bbox
        cv2.rectangle(frame_color_for_draw, (x_b, y_b), (x_b + w_b, y_b + h_b), (0, 255, 0), 2)
        if USER_TARGET_CLASS_NAME:
             cv2.putText(frame_color_for_draw, f"{USER_TARGET_CLASS_NAME}: {score:.2f}", (x_b, y_b - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        return np.array([cx, cy]), bbox, detected_classes_in_frame

    # --- 타겟 미발견 시 디버깅 출력 추가 ---
    # print(f"[Frame {frame_idx_for_debug} DETECT] Target '{USER_TARGET_CLASS_NAME}' NOT found. All detected: {list(detected_classes_in_frame)}")
    # --- 여기까지 ---
    return None, None, detected_classes_in_frame

cap = cv2.VideoCapture(VIDEO_INPUT_PATH)
if not cap.isOpened():
    print(f"오류: '{VIDEO_INPUT_PATH}' 영상을 열 수 없습니다.")
    exit()

fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30.0
dt = 1.0 / fps
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_video = cv2.VideoWriter(VIDEO_OUTPUT_PATH, fourcc, fps, (frame_width, frame_height))

A = np.array([[1, 0, dt, 0], [0, 1, 0, dt], [0, 0, 1, 0], [0, 0, 0, 1]])
B = np.array([[0.5 * dt**2, 0], [0, 0.5 * dt**2], [dt, 0], [0, dt]])
u_k = np.array([0.0, 0.0])
H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])
Q_factor = accel_noise_std**2
Q = np.array([[dt**4/4, 0, dt**3/2, 0], [0, dt**4/4, 0, dt**3/2],
              [dt**3/2, 0, dt**2, 0], [0, dt**3/2, 0, dt**2]]) * Q_factor
R = np.eye(2) * measurement_noise_std**2

x_est = np.array([frame_width / 2.0, frame_height / 2.0, 0.0, 0.0])
P = np.eye(4) * 100.0

estimated_positions_history = []
measured_positions_history = []
hf_raw_positions_history = []

print("영상 처리 및 칼만 필터링 시작...")
print(f"주의: USER_TARGET_CLASS_NAME이 현재 '{USER_TARGET_CLASS_NAME}'으로 설정되어 있습니다.")
print("초기 몇 프레임의 탐지 결과를 보고 해당하는 클래스 이름으로 코드를 수정하세요.")

frame_idx = 0
while True:
    ret, frame_color = cap.read()
    if not ret:
        break

    # --- 50 프레임 제한 로직 추가 ---
    # if frame_idx >= 50: # 0부터 49까지 총 50 프레임 처리 후 종료
    #     print(f"처음 50프레임 (0~{frame_idx-1}번 프레임) 처리를 완료하고 종료합니다.")
    #     break
    # --- 여기까지 추가 ---

    output_frame = frame_color.copy()

    detected_center_hf, _, detected_classes_this_frame = detect_object_huggingface(frame_color, output_frame, frame_idx)
    
    current_raw_measurement_hf = None
    current_noisy_measurement_kf = None

    if detected_center_hf is not None:
        current_raw_measurement_hf = detected_center_hf.copy()
        noisy_x = detected_center_hf[0] + np.random.normal(0, measurement_noise_std)
        noisy_y = detected_center_hf[1] + np.random.normal(0, measurement_noise_std)
        current_noisy_measurement_kf = np.array([noisy_x, noisy_y])

    hf_raw_positions_history.append(current_raw_measurement_hf)
    measured_positions_history.append(current_noisy_measurement_kf)

    x_pred = A @ x_est + B @ u_k
    P_pred = A @ P @ A.T + Q

    if current_noisy_measurement_kf is not None:
        y = current_noisy_measurement_kf - H @ x_pred
        S = H @ P_pred @ H.T + R
        K = P_pred @ H.T @ np.linalg.inv(S)
        x_est = x_pred + K @ y
        P = (np.eye(4) - K @ H) @ P_pred
    else:
        x_est = x_pred.copy()
        P = P_pred.copy()

    estimated_positions_history.append(x_est[:2].copy())

    if current_raw_measurement_hf is not None:
        cv2.circle(output_frame, (int(current_raw_measurement_hf[0]), int(current_raw_measurement_hf[1])), 
                   5, (255, 100, 0), -1)
        if USER_TARGET_CLASS_NAME:
             cv2.putText(output_frame, f"HF Raw ({USER_TARGET_CLASS_NAME})", (int(current_raw_measurement_hf[0])+7, int(current_raw_measurement_hf[1])),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,100,0), 1)

    if current_noisy_measurement_kf is not None:
        cv2.circle(output_frame, (int(current_noisy_measurement_kf[0]), int(current_noisy_measurement_kf[1])),
                   5, (128, 128, 128), -1)
        cv2.putText(output_frame, "KF Input", (int(current_noisy_measurement_kf[0])+7, int(current_noisy_measurement_kf[1])+10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (128,128,128), 1)

    kf_pos_x, kf_pos_y = int(x_est[0]), int(x_est[1])
    cv2.circle(output_frame, (kf_pos_x, kf_pos_y), 7, (0, 0, 255), -1)
    cv2.putText(output_frame, "KF Est.", (kf_pos_x+7, kf_pos_y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,255), 1)

    for i in range(1, len(hf_raw_positions_history)):
        if hf_raw_positions_history[i-1] is not None and hf_raw_positions_history[i] is not None:
            p1 = (int(hf_raw_positions_history[i-1][0]), int(hf_raw_positions_history[i-1][1]))
            p2 = (int(hf_raw_positions_history[i][0]), int(hf_raw_positions_history[i][1]))
            cv2.line(output_frame, p1, p2, (255, 150, 100), 1)
            
    for i in range(1, len(estimated_positions_history)):
        p1 = (int(estimated_positions_history[i-1][0]), int(estimated_positions_history[i-1][1]))
        p2 = (int(estimated_positions_history[i][0]), int(estimated_positions_history[i][1]))
        cv2.line(output_frame, p1, p2, (50, 50, 255), 2)

    cv2.putText(output_frame, f"Frame: {frame_idx}", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 1, cv2.LINE_AA)
    out_video.write(output_frame)
    
    frame_idx +=1
    if frame_idx % 100 == 0:
        print(f"Processed {frame_idx} frames...")
    
    if frame_idx >= 5 and USER_TARGET_CLASS_NAME is None and not detected_classes_this_frame : 
        if frame_idx == 5 : print("경고: 초기 프레임에서 객체가 탐지되지 않거나 USER_TARGET_CLASS_NAME이 부적절할 수 있습니다. CONFIDENCE_THRESHOLD를 낮추거나 다른 모델을 고려해보세요.")

print("처리 완료. 결과 비디오가 다음 경로에 저장되었습니다:", VIDEO_OUTPUT_PATH)
cap.release()
out_video.release()
cv2.destroyAllWindows()

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hugging Face 모델 'facebook/detr-resnet-50' 로드 성공.
영상 처리 및 칼만 필터링 시작...
주의: USER_TARGET_CLASS_NAME이 현재 'airplane'으로 설정되어 있습니다.
초기 몇 프레임의 탐지 결과를 보고 해당하는 클래스 이름으로 코드를 수정하세요.
[Frame 0] Detected classes: ['airplane']. Set USER_TARGET_CLASS_NAME if metronome is among them.
[Frame 0 DETECT] Target 'airplane' found. Score: 0.99, Raw Coords: (192, 900)
[Frame 1] Detected classes: ['airplane']. Set USER_TARGET_CLASS_NAME if metronome is among them.
[Frame 1 DETECT] Target 'airplane' found. Score: 0.99, Raw Coords: (193, 900)
[Frame 2] Detected classes: ['airplane']. Set USER_TARGET_CLASS_NAME if metronome is among them.
[Frame 2 DETECT] Target 'airplane' found. Score: 0.99, Raw Coords: (194, 900)
[Frame 3] Detected classes: ['airplane']. Set USER_TARGET_CLASS_NAME if metronome is among them.
[Frame 3 DETECT] Target 'airplane' found. Score: 0.99, Raw Coords: (195, 900)
[Frame 4] Detected classes: ['airplane']. Set USER_TARGET_CLASS_NAME if metronome is among them.
[Frame 4 DETECT] Target 'airplane

In [None]:
# 상태 벡터에 가속도 추가한 모델

import cv2
import numpy as np
import os
from transformers import AutoImageProcessor, AutoModelForObjectDetection
import torch
from PIL import Image

# =================================================================
# 영상 객체 추적: 상수 가속도(CA) 칼만 필터 적용 버전
# - 기존 상수 속도(CV) 모델을 상수 가속도(CA) 모델로 변경
# - 상태 벡터: [x, y, vx, vy, ax, ay] (6차원)
# =================================================================


# --- 설정 변수 ---
VIDEO_INPUT_PATH = "KF_vid3.mp4"
VIDEO_OUTPUT_PATH = "KF_vid_o_CA.mp4" # 출력 파일명 변경
USER_TARGET_CLASS_NAME = 'airplane' # 예: "clock"
HF_MODEL_NAME = "facebook/detr-resnet-50"
CONFIDENCE_THRESHOLD = 0.5

# --- 칼만 필터 파라미터 ---
# [변경] CA 모델에서는 예측할 수 없는 가속도의 변화(Jerk)를 노이즈로 모델링합니다.
jerk_noise_std = 0.5
measurement_noise_std = 8.0 # 시뮬레이션 목적의 측정 노이즈

# --- Hugging Face 모델 및 프로세서 로드 ---
try:
    image_processor = AutoImageProcessor.from_pretrained(HF_MODEL_NAME)
    model = AutoModelForObjectDetection.from_pretrained(HF_MODEL_NAME)
    print(f"Hugging Face 모델 '{HF_MODEL_NAME}' 로드 성공.")
except Exception as e:
    print(f"오류: Hugging Face 모델 '{HF_MODEL_NAME}' 로드 실패. ({e})")
    image_processor = None
    model = None

def detect_object_huggingface(frame_color_cv, frame_color_for_draw, frame_idx_for_debug=0):
    global USER_TARGET_CLASS_NAME

    if model is None or image_processor is None:
        return None, None, set() # 모델 로드 실패 시 빈 세트 반환

    image_pil = Image.fromarray(cv2.cvtColor(frame_color_cv, cv2.COLOR_BGR2RGB))
    original_width, original_height = image_pil.size
    inputs = image_processor(images=image_pil, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)

    target_sizes = torch.tensor([image_pil.size[::-1]])
    results = image_processor.post_process_object_detection(outputs,
                                                             threshold=CONFIDENCE_THRESHOLD,
                                                             target_sizes=target_sizes)[0]

    best_target_detection = None
    detected_classes_in_frame = set()

    for score, label_id, box in zip(results["scores"], results["labels"], results["boxes"]):
        label_name = model.config.id2label[label_id.item()].lower()
        detected_classes_in_frame.add(label_name)

        if USER_TARGET_CLASS_NAME is not None and label_name == USER_TARGET_CLASS_NAME.lower():
            box_coords = box.tolist()
            startX, startY, endX, endY = int(box_coords[0]), int(box_coords[1]), int(box_coords[2]), int(box_coords[3])
            startX, startY = max(0, startX), max(0, startY)
            endX, endY = min(original_width, endX), min(original_height, endY)
            bbox = (startX, startY, endX - startX, endY - startY)
            cx = int((startX + endX) / 2.0)
            cy = int((startY + endY) / 2.0)
            current_score = score.item()
            if best_target_detection is None or current_score > best_target_detection[0]:
                best_target_detection = (current_score, cx, cy, bbox)

    if frame_idx_for_debug < 5 or USER_TARGET_CLASS_NAME is None:
        if detected_classes_in_frame:
            print(f"[Frame {frame_idx_for_debug}] Detected classes: {list(detected_classes_in_frame)}. "
                  f"Set USER_TARGET_CLASS_NAME if metronome is among them.")
        elif frame_idx_for_debug < 5:
             print(f"[Frame {frame_idx_for_debug}] No objects detected with confidence > {CONFIDENCE_THRESHOLD}.")

    if best_target_detection:
        score, cx, cy, bbox = best_target_detection
        print(f"[Frame {frame_idx_for_debug} DETECT] Target '{USER_TARGET_CLASS_NAME}' found. Score: {score:.2f}, Raw Coords: ({cx}, {cy})")
        (x_b, y_b, w_b, h_b) = bbox
        cv2.rectangle(frame_color_for_draw, (x_b, y_b), (x_b + w_b, y_b + h_b), (0, 255, 0), 2)
        if USER_TARGET_CLASS_NAME:
             cv2.putText(frame_color_for_draw, f"{USER_TARGET_CLASS_NAME}: {score:.2f}", (x_b, y_b - 5),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        return np.array([cx, cy]), bbox, detected_classes_in_frame

    return None, None, detected_classes_in_frame

cap = cv2.VideoCapture(VIDEO_INPUT_PATH)
if not cap.isOpened():
    print(f"오류: '{VIDEO_INPUT_PATH}' 영상을 열 수 없습니다.")
    exit()

fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30.0
dt = 1.0 / fps
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_video = cv2.VideoWriter(VIDEO_OUTPUT_PATH, fourcc, fps, (frame_width, frame_height))

# [변경] --- 상수 가속도(CA) 모델용 칼만 필터 행렬 정의 ---
dt_2 = 0.5 * dt**2
# 상태 전이 행렬 A (6x6)
A = np.array([[1, 0, dt, 0, dt_2, 0],
              [0, 1, 0, dt, 0, dt_2],
              [0, 0, 1, 0, dt,   0],
              [0, 0, 0, 1, 0,   dt],
              [0, 0, 0, 0, 1,   0],
              [0, 0, 0, 0, 0,   1]])

# 측정 행렬 H (2x6)
H = np.array([[1, 0, 0, 0, 0, 0],
              [0, 1, 0, 0, 0, 0]])

# 프로세스 노이즈 공분산 Q (6x6)
q_factor = jerk_noise_std**2
Q_base = np.array([
    [dt**4/4, 0, dt**3/2, 0, dt**2/2, 0],
    [0, dt**4/4, 0, dt**3/2, 0, dt**2/2],
    [dt**3/2, 0, dt**2, 0, dt, 0],
    [0, dt**3/2, 0, dt**2, 0, dt],
    [dt**2/2, 0, dt, 0, 1, 0],
    [0, dt**2/2, 0, dt, 0, 1]
])
Q = Q_base * q_factor

# 측정 노이즈 공분산 R (2x2) - 변경 없음
R = np.eye(2) * measurement_noise_std**2

# [변경] --- 초기 상태 및 공분산 정의 (6차원) ---
x_est = np.array([frame_width / 2.0, frame_height / 2.0, 0.0, 0.0, 0.0, 0.0])
P = np.eye(6) * 100.0


estimated_positions_history = []
measured_positions_history = []
hf_raw_positions_history = []

print("영상 처리 및 상수 가속도(CA) 칼만 필터링 시작...")
print(f"주의: USER_TARGET_CLASS_NAME이 현재 '{USER_TARGET_CLASS_NAME}'으로 설정되어 있습니다.")
print("초기 몇 프레임의 탐지 결과를 보고 해당하는 클래스 이름으로 코드를 수정하세요.")

frame_idx = 0
while True:
    ret, frame_color = cap.read()
    if not ret:
        break

    output_frame = frame_color.copy()

    detected_center_hf, _, detected_classes_this_frame = detect_object_huggingface(frame_color, output_frame, frame_idx)
    
    current_raw_measurement_hf = None
    current_noisy_measurement_kf = None

    if detected_center_hf is not None:
        # "참값"으로 사용될 원본 측정값
        current_raw_measurement_hf = detected_center_hf.copy()
        
        # 시뮬레이션 목적: 참값에 노이즈를 추가하여 불완전한 센서 측정값을 모사
        noisy_x = detected_center_hf[0] + np.random.normal(0, measurement_noise_std)
        noisy_y = detected_center_hf[1] + np.random.normal(0, measurement_noise_std)
        current_noisy_measurement_kf = np.array([noisy_x, noisy_y])

    hf_raw_positions_history.append(current_raw_measurement_hf)
    measured_positions_history.append(current_noisy_measurement_kf)

    # --- 칼만 필터 예측(Prediction) 단계 ---
    # [변경] 제어 입력(B, u_k) 없이 상태 예측
    x_pred = A @ x_est
    P_pred = A @ P @ A.T + Q

    # --- 칼만 필터 업데이트(Update) 단계 ---
    if current_noisy_measurement_kf is not None:
        y = current_noisy_measurement_kf - H @ x_pred
        S = H @ P_pred @ H.T + R
        K = P_pred @ H.T @ np.linalg.inv(S)
        x_est = x_pred + K @ y
        # [변경] 항등 행렬 크기를 6x6으로 수정
        P = (np.eye(6) - K @ H) @ P_pred
    else: # 측정이 없을 경우, 예측값을 그대로 사용
        x_est = x_pred.copy()
        P = P_pred.copy()

    estimated_positions_history.append(x_est[:2].copy())

    # --- 시각화 ---
    # HF 원본 측정값 (파란색 점)
    if current_raw_measurement_hf is not None:
        cv2.circle(output_frame, (int(current_raw_measurement_hf[0]), int(current_raw_measurement_hf[1])), 
                   5, (255, 100, 0), -1)
        if USER_TARGET_CLASS_NAME:
             cv2.putText(output_frame, f"HF Raw ({USER_TARGET_CLASS_NAME})", (int(current_raw_measurement_hf[0])+7, int(current_raw_measurement_hf[1])),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,100,0), 1)

    # 노이즈가 추가된 KF 입력값 (회색 점)
    if current_noisy_measurement_kf is not None:
        cv2.circle(output_frame, (int(current_noisy_measurement_kf[0]), int(current_noisy_measurement_kf[1])),
                   5, (128, 128, 128), -1)
        cv2.putText(output_frame, "KF Input (Simulated Noise)", (int(current_noisy_measurement_kf[0])+7, int(current_noisy_measurement_kf[1])+10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (128,128,128), 1)

    # KF 최종 추정 위치 (빨간색 점)
    kf_pos_x, kf_pos_y = int(x_est[0]), int(x_est[1])
    cv2.circle(output_frame, (kf_pos_x, kf_pos_y), 7, (0, 0, 255), -1)
    cv2.putText(output_frame, "KF Est.", (kf_pos_x+7, kf_pos_y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,255), 1)

    # 궤적 그리기
    for i in range(1, len(hf_raw_positions_history)):
        if hf_raw_positions_history[i-1] is not None and hf_raw_positions_history[i] is not None:
            p1 = (int(hf_raw_positions_history[i-1][0]), int(hf_raw_positions_history[i-1][1]))
            p2 = (int(hf_raw_positions_history[i][0]), int(hf_raw_positions_history[i][1]))
            cv2.line(output_frame, p1, p2, (255, 150, 100), 1) # HF Raw 궤적
            
    for i in range(1, len(estimated_positions_history)):
        p1 = (int(estimated_positions_history[i-1][0]), int(estimated_positions_history[i-1][1]))
        p2 = (int(estimated_positions_history[i][0]), int(estimated_positions_history[i][1]))
        cv2.line(output_frame, p1, p2, (50, 50, 255), 2) # KF 추정 궤적

    cv2.putText(output_frame, f"Frame: {frame_idx}", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 1, cv2.LINE_AA)
    out_video.write(output_frame)
    
    frame_idx +=1
    if frame_idx % 100 == 0:
        print(f"Processed {frame_idx} frames...")
    
    if frame_idx >= 5 and USER_TARGET_CLASS_NAME is None and not detected_classes_this_frame : 
        if frame_idx == 5 : print("경고: 초기 프레임에서 객체가 탐지되지 않거나 USER_TARGET_CLASS_NAME이 부적절할 수 있습니다. CONFIDENCE_THRESHOLD를 낮추거나 다른 모델을 고려해보세요.")

print("처리 완료. 결과 비디오가 다음 경로에 저장되었습니다:", VIDEO_OUTPUT_PATH)
cap.release()
out_video.release()
cv2.destroyAllWindows()