# YOLO Setup & Retrieve, CSV
* https://docs.ultralytics.com/ko/tasks/pose/
* https://docs.ultralytics.com/ko/datasets/pose/coco/
* 실행환경 : Google Colab

## YOLO11 설치

In [1]:
!pip install ultralytics opencv-python pandas


Collecting ultralytics
  Downloading ultralytics-8.3.168-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

## MP4 비디오에서 관절 좌표(x, y) 추출, CSV에 저장, visibility는 제외함
* mp4 비디오 다운로드 사이트 : https://www.istockphoto.com/

In [2]:
import cv2
import pandas as pd
from ultralytics import YOLO

# 1. 모델 불러오기 (Pose Estimation용)
model = YOLO('yolo11n-pose.pt')  # 또는 yolov8s-pose.pt, yolov8m-pose.pt

# 2. 비디오 불러오기
video_path = '/content/drive/MyDrive/Python_AI/YOLO/test_mp4/walk_sample_video.mp4'  # 비디오 파일 경로
cap = cv2.VideoCapture(video_path)

frame_idx = 0
data = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # 3. 포즈 추론
    results = model(frame)

    # 4. 결과에서 keypoints 추출
    for result in results:
        keypoints = result.keypoints
        if keypoints is not None:
            for person in keypoints.xy:  # [n_persons, 17, 2] - x, y
                row = {'frame': frame_idx}
                for i, (x, y) in enumerate(person):
                    row[f'x{i}'] = float(x)
                    row[f'y{i}'] = float(y)
                data.append(row)

    frame_idx += 1

cap.release()

# 5. CSV로 저장
df = pd.DataFrame(data)
df.to_csv('/content/drive/MyDrive/Python_AI/YOLO/train_data/walk_sample_video.csv', index=False)

print("✅ CSV 저장 완료: pose_keypoints.csv")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt to 'yolo11n-pose.pt'...


100%|██████████| 5.97M/5.97M [00:00<00:00, 342MB/s]


✅ CSV 저장 완료: pose_keypoints.csv


## 영상 위에 keypoints를 표시하여 다른 mp4 파일에 저장하는 예

In [3]:
import cv2
from ultralytics import YOLO

# 모델 경로 (사용자가 직접 훈련한 경우 해당 경로 사용)
model = YOLO('yolo11n-pose.pt')

# 입력 비디오 경로
input_video_path = '/content/drive/MyDrive/Python_AI/YOLO/run_70frames_video.mp4'
cap = cv2.VideoCapture(input_video_path)

# 출력 비디오 설정
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('output_pose.mp4', fourcc, fps, (width, height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLO Pose 추론
    results = model(frame)

    # 키포인트 시각화 및 출력 프레임 저장
    for result in results:
        if result.keypoints is not None:
            for person in result.keypoints.xy:
                for (x, y) in person:
                    # 각 키포인트에 점 찍기
                    cv2.circle(frame, (int(x), int(y)), radius=1, color=(0, 255, 0), thickness=-1)

    # 프레임 저장
    out.write(frame)

cap.release()
out.release()

print("✅ 완료: output_pose.mp4 저장됨.")



0: 480x640 1 person, 407.4ms
Speed: 12.4ms preprocess, 407.4ms inference, 37.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 101.0ms
Speed: 43.8ms preprocess, 101.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 86.2ms
Speed: 1.9ms preprocess, 86.2ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 88.9ms
Speed: 1.9ms preprocess, 88.9ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 79.9ms
Speed: 1.9ms preprocess, 79.9ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 91.9ms
Speed: 1.9ms preprocess, 91.9ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 84.4ms
Speed: 1.7ms preprocess, 84.4ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 89.0ms
Speed: 1.6ms preprocess, 89.0ms inference, 1.3ms postprocess per image at shape (1

In [None]:
# 관절 사이에 선 그리기
import cv2
from ultralytics import YOLO

# 모델 로드
model = YOLO('yolo11n-pose.pt')

# 비디오 열기
input_video_path = '/content/drive/MyDrive/Python_AI/YOLO/run_70frames_video.mp4'
cap = cv2.VideoCapture(input_video_path)

# 출력 비디오 설정
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('output_pose.mp4', fourcc, fps, (width, height))

# COCO 17 keypoints 연결 구조 (skeleton)
# (참고: COCO keypoints index 정의에 따른 연결)
skeleton = [
    (5, 7), (7, 9),     # left arm
    (6, 8), (8, 10),    # right arm
    (5, 6),             # shoulders
    (11, 13), (13, 15), # left leg
    (12, 14), (14, 16), # right leg
    (11, 12),           # hips
    (5, 11), (6, 12),   # torso sides
    (0, 1), (1, 3), (0, 2), (2, 4), # eyes to ears
    (0, 5), (0, 6)      # head to shoulders
]

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # YOLO Pose 추론
    results = model(frame)

    for result in results:
        if result.keypoints is not None:
            for person in result.keypoints.xy:
                keypoints = person.cpu().numpy()

                # 점 찍기
                for x, y in keypoints:
                    cv2.circle(frame, (int(x), int(y)), radius=2, color=(0, 255, 0), thickness=-1)

                # 선 그리기 (관절 연결)
                for idx1, idx2 in skeleton:
                    if idx1 < len(keypoints) and idx2 < len(keypoints):
                        x1, y1 = keypoints[idx1]
                        x2, y2 = keypoints[idx2]
                        if x1 > 0 and y1 > 0 and x2 > 0 and y2 > 0:  # 존재하는 키포인트만 연결
                            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), color=(255, 0, 0), thickness=1)

    out.write(frame)

cap.release()
out.release()

print("✅ 완료: output_pose.mp4 저장됨.")


## MP4에서 [x, y, visible] 값을 추출하여  CSV 파일에 저장하기
* YOLOv11 또는 YOLOv8의 *-pose.pt 모델(예: yolo11n-pose.pt, yolov8n-pose.pt)은 Ultralytics의 COCO Pose 포맷(17 keypoints)을 따름
* x : 이미지 픽셀 x 좌표(이미지 왼쪽 0)
* y : 이미지 픽셀 y 좌표(이미지 상단 0)
* visible : confidence, visibility(0~1 사잇값) 0(보이지 않음), 0.5(불확실함), 1(보임)
* visible값은 ML모델 학습시에 손실 계산에서 가중치로 사용할 수 있다(아래 참조)
* loss = torch.mean(visible * (y_pred - y_true) ** 2)

In [None]:
!pip install ultralytics opencv-python pandas

In [None]:
import cv2
import pandas as pd
from ultralytics import YOLO

# 1. 모델 로드
model = YOLO('yolo11n-pose.pt')  # 또는 yolov8n-pose.pt

# 2. 비디오 로드
video_path = '/content/drive/MyDrive/Python_AI/YOLO/walk_man_right_with_phone.mp4'
cap = cv2.VideoCapture(video_path)

# 폭 (width), 높이 (height) 구하기
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

frame_idx = 0
data = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # 3. 포즈 추론
    results = model(frame)

    # 4. 결과에서 키포인트 [x, y, visible] 추출
    for result in results:
        keypoints = result.keypoints  # result.keypoints.xy: [x, y], result.keypoints.conf: visible
        if keypoints is not None:
            for person_idx, (coords_xy, visibility) in enumerate(zip(keypoints.xy, keypoints.conf)):
                #row = {'frame': frame_idx, 'person': person_idx}   # 다수의 사람을 대상으로 하는 경우
                row = {'frame': frame_idx}     # 첫번째 사람만 대상으로 하는 경우
                for i, ((x, y), v) in enumerate(zip(coords_xy, visibility)):
                    row[f'x{i}'] = float(x)
                    row[f'y{i}'] = float(y)
                    row[f'v{i}'] = float(v)  # visible/confidence 값
                data.append(row)

    frame_idx += 1

cap.release()

# 5. CSV 저장
df = pd.DataFrame(data)
df.to_csv('/content/drive/MyDrive/Python_AI/YOLO/train_data/walk_man_right_with_phone.csv', index=False)

print("✅ 완료: csv 저장됨")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt to 'yolo11n-pose.pt'...


100%|██████████| 5.97M/5.97M [00:00<00:00, 114MB/s]



0: 384x640 1 person, 340.6ms
Speed: 18.4ms preprocess, 340.6ms inference, 32.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 135.6ms
Speed: 4.7ms preprocess, 135.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 146.0ms
Speed: 4.5ms preprocess, 146.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 153.7ms
Speed: 5.0ms preprocess, 153.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 138.8ms
Speed: 4.8ms preprocess, 138.8ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 139.4ms
Speed: 3.7ms preprocess, 139.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 139.0ms
Speed: 4.4ms preprocess, 139.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 138.3ms
Speed: 5.1ms preprocess, 138.3ms inference, 1.2ms postprocess per image 