In [1]:
import cv2
import os

In [2]:
def split_video(input_video, output_folder, frames_per_split=2000):
    # Open the video file
    video = cv2.VideoCapture(input_video)
    if not video.isOpened():
        print(f"Error: Could not open video '{input_video}'")
        return
    
    # Get some properties from the video file
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = video.get(cv2.CAP_PROP_FPS)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Calculate number of splits needed
    num_splits = total_frames // frames_per_split
    if total_frames % frames_per_split != 0:
        num_splits += 1
    
    print(f"Total frames: {total_frames}, FPS: {fps}, Resolution: {width}x{height}")
    print(f"Splitting video into {num_splits} parts...")
    
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Read and save frames
    frame_count = 0
    split_count = 0
    while True:
        success, frame = video.read()
        
        if not success:
            break
        
        # Save the frame
        if frame_count % frames_per_split == 0:
            split_count += 1
            output_filename = os.path.join(output_folder, f"part_{split_count}.mp4")
            print(f"Saving {output_filename}...")
            # Define the codec and create VideoWriter object
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(output_filename, fourcc, fps, (width, height))
        
        out.write(frame)
        frame_count += 1
        
        # Break the loop if we have reached the desired number of frames
        if frame_count >= total_frames or split_count >= num_splits:
            break
    
    # Release everything when finished
    video.release()
    out.release()
    cv2.destroyAllWindows()
    
    print("Video splitting completed.")

In [3]:
# Example usage:
input_video = 'data/input_video.mp4'  # Replace with your video file path
output_folder = 'output_frames'  # Replace with your desired output folder

split_video(input_video, output_folder)

Total frames: 5092, FPS: 24.0, Resolution: 1280x720
Splitting video into 3 parts...
Saving output_frames/part_1.mp4...


OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Saving output_frames/part_2.mp4...


OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Saving output_frames/part_3.mp4...
Video splitting completed.


OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


In [4]:
from ultralytics import YOLO

dir = os.getcwd()
video_path = os.path.join(dir, 'output_frames/part_1.mp4')
bytetrack_yaml_path = os.path.join(dir, 'bytetrack.yaml')

model = YOLO('yolov8n.pt')

results = model.track(source=video_path, persist=True, tracker=bytetrack_yaml_path)

results



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/2000) /home/mg/projects/deepdaiv/skeleton/audrn/output_frames/part_1.mp4: 384x640 4 persons, 141.7ms
video 1/1 (frame 2/2000) /home/mg/projects/deepdaiv/skeleton/audrn/output_frames/part_1.mp4: 384x640 4 persons, 90.9ms
video 1/1 (frame 3/2000) /home/mg/projects/deepdaiv/skeleton/audrn/output_frames/part_1.mp4: 384x640 4 persons, 115.2ms
video 1/1 (frame 4/2000) /home/mg/projects/deepdaiv/skeleton/audrn/output_frames/part_1.mp4: 384x640 3 persons, 144.1ms
video 1/1 (frame 5/2000) /home/mg/projects/deepdaiv/skeleton/audrn/output_frames/part_1.mp4: 

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [6]:
import yaml

# YAML 파일 경로 설정
yaml_file_path = 'labels.yaml'

# YAML 파일 읽기
with open(yaml_file_path, 'r') as file:
    labels = yaml.safe_load(file)

# 클래스 이름 추출
labels['names'][0]

'person'

In [7]:
import pytesseract

In [8]:
def extract_text_from_player(image, box):
    x1, y1, x2, y2 = map(int, box)
    crop_img = image[y1:y2, x1:x2]
    crop_img = cv2.resize(crop_img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    
    # Tesseract로 OCR 적용
    config = r'--oem 3 --psm 6'
    text = pytesseract.image_to_string(thresh, config=config)
    return text.strip()

In [11]:
# 결과 비디오를 저장할 경로 설정
output_video_path = os.path.join(dir, 'data/preprocessed_video.mp4')

# 입력 비디오를 열어 속성 가져오기
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# 출력 비디오 설정
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# 프레임별로 처리
frame_idx = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # 현재 프레임에서 검출된 객체 정보 가져오기
    data = results[frame_idx].boxes  # 현재 프레임의 객체 정보
    
    # 각 객체에 대해 바운딩 박스와 클래스 이름 표시
    #for *xyxy, conf, cls in data:
    for i in range(len(data.xyxy)):
        cls = data.cls[i]
        conf = data.conf[i]
        xyxy = data.xyxy[i]
        label = f'{labels["names"][int(cls)]} {conf:.2f}'
        if int(cls) == 0:   # label is person
            label = f'{extract_text_from_player(frame, xyxy)} {conf:.2f}'
        cv2.rectangle(frame, (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])), (0, 255, 0), 2)
        cv2.putText(frame, label, (int(xyxy[0]), int(xyxy[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    # 수정된 프레임을 결과 비디오에 쓰기
    out.write(frame)
    
    frame_idx += 1

# 모든 자원 해제
cap.release()
out.release()
cv2.destroyAllWindows()

print("Completed annotating video.")