In [13]:
from ultralytics import YOLO
import cv2
import numpy as np
import json
from moviepy.editor import VideoFileClip
import os  # Импортируем модуль os для работы с файловой системой

class VideoObjectDetection:
    def __init__(self, video_path, conf_threshold=0.5, tracked_classes=None):
        self.video_path = video_path
        self.model = YOLO("yolov10x.pt")
        self.conf_threshold = conf_threshold  

        # Список классов модели YOLO
        self.class_names = [
            "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
            "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
            "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
            "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", 
            "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
            "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
            "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
            "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
            "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
            "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
            "scissors", "teddy bear", "hair drier", "toothbrush"
        ]

        # Список классов для отслеживания
        self.tracked_classes = tracked_classes if tracked_classes else self.class_names
        self.detections_data = []  # Для сохранения информации об объектах

    def get_contrast_color(self, background_color):
        """Возвращает черный или белый цвет в зависимости от яркости фона."""
        brightness = np.mean(background_color)
        return (255, 255, 255) if brightness < 128 else (0, 0, 0)

    def process_video(self, output_video_path="output_with_detections.mp4", json_output_path="detections.json"):
        cap = cv2.VideoCapture(self.video_path)

        if not cap.isOpened():
            print("Ошибка: не удалось открыть видео.")
            return

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        frame_count = 0  # Счётчик кадров

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        temp_output_path = "temp_output.mp4"
        out = cv2.VideoWriter(temp_output_path, fourcc, fps, (frame_width, frame_height))

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1  # Увеличиваем номер кадра
            results = self.model(frame)

            det = results[0]
            pred = det.boxes.xyxy.cpu().numpy() 
            classes = det.boxes.cls.cpu().numpy()  
            confs = det.boxes.conf.cpu().numpy() 

            frame_detections = []  # Список объектов на текущем кадре

            for box, cls, conf in zip(pred, classes, confs):
                if conf < self.conf_threshold:
                    continue 
                
                class_name = self.class_names[int(cls)]
                
                if class_name not in self.tracked_classes:
                    continue

                x1, y1, x2, y2 = box
                label = f"{class_name} ({conf:.2f})" 

                # Добавление информации о детекции в список
                frame_detections.append({
                    "class": class_name,
                    "confidence": float(conf),
                    "bbox": [int(x1), int(y1), int(x2), int(y2)]
                })

                background_color = cv2.mean(frame[int(y1):int(y2), int(x1):int(x2)])[0:3]
                text_color = self.get_contrast_color(background_color)

                # Проверка выхода текста за границы кадра
                (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
                y_text = int(y1) - 10

                # Смещение текста вверх, если он выходит за верхнюю границу кадра
                if y_text - text_height < 0:
                    y_text = int(y1) + text_height + 10

                # Убедимся, что текст не выходит за правую границу кадра
                if int(x1) + text_width > frame_width:
                    x1 = frame_width - text_width - 10

                cv2.putText(frame, label, (int(x1), y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2)
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)

            # Сохранение детекций текущего кадра
            if frame_detections:
                timestamp = frame_count / fps  # Вычисление таймкода в секундах
                self.detections_data.append({
                    "frame": frame_count,
                    "timestamp": round(timestamp, 2),  # Округляем до двух знаков после запятой
                    "detections": frame_detections
                })

            out.write(frame)
            cv2.imshow('Frame with Detections', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        out.release()
        cv2.destroyAllWindows()

        # Сохранение данных о детекциях в JSON файл
        with open(json_output_path, 'w') as f:
            json.dump(self.detections_data, f, indent=4)

        # Добавление аудио к видео
        video_clip = VideoFileClip(temp_output_path)
        original_audio = VideoFileClip(self.video_path).audio
        video_with_audio = video_clip.set_audio(original_audio)

        video_with_audio.write_videofile(output_video_path, codec="libx264", audio_codec="aac")

        print(f"Видео с детекциями и аудио сохранено как {output_video_path}")
        print(f"Детекции сохранены в файл {json_output_path}")

        # Удаляем временный файл
        if os.path.exists(temp_output_path):
            os.remove(temp_output_path)
            print(f"Временный файл {temp_output_path} был удалён.")

class VideoPlayer:
    def __init__(self, video_path, tracked_classes, conf_threshold=0.5):
        self.video_path = video_path
        self.tracked_classes = tracked_classes
        self.conf_threshold = conf_threshold
        self.detector = VideoObjectDetection(video_path, conf_threshold, tracked_classes)

    def play_video_with_detections(self, output_video_path="output_with_detections.mp4", json_output_path="detections.json"):
        self.detector.process_video(output_video_path=output_video_path, json_output_path=json_output_path)


# Пример использования:
video_path = "Pixel_2.mp4"

# "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
# "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
# "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
# "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", 
# "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
# "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
# "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
# "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
# "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
# "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
# "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
# "scissors", "teddy bear", "hair drier", "toothbrush"


# Указываем конкретные классы для отслеживания
tracked_classes = [
            "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
            "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter",
            "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
            "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", 
            "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
            "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
            "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
            "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
            "TV", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
            "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
            "scissors", "teddy bear", "hair drier", "toothbrush"
        ]

num_conf_threshold = 0.6
output_video_file = "output_Pixel_2.mp4"  # Укажите имя выходного видео файла
output_json_file = "output_Pixel_2.json"  # Укажите имя выходного JSON файла

video_player = VideoPlayer(video_path, tracked_classes, num_conf_threshold)
video_player.play_video_with_detections(output_video_path=output_video_file, json_output_path=output_json_file)



0: 320x640 1 person, 249.5ms
Speed: 1.0ms preprocess, 249.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 248.8ms
Speed: 1.0ms preprocess, 248.8ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 242.2ms
Speed: 1.0ms preprocess, 242.2ms inference, 1.5ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 cell phone, 255.1ms
Speed: 1.0ms preprocess, 255.1ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 cell phone, 244.6ms
Speed: 1.0ms preprocess, 244.6ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 cell phone, 246.8ms
Speed: 2.5ms preprocess, 246.8ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 cell phone, 243.2ms
Speed: 1.1ms preprocess, 243.2ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 1 cell phone, 246.0ms
Spee

                                                                    

MoviePy - Done.
Moviepy - Writing video output_Pixel_2.mp4



                                                               

Moviepy - Done !
Moviepy - video ready output_Pixel_2.mp4
Видео с детекциями и аудио сохранено как output_Pixel_2.mp4
Детекции сохранены в файл output_Pixel_2.json


PermissionError: [WinError 32] Процесс не может получить доступ к файлу, так как этот файл занят другим процессом: 'temp_output.mp4'

In [4]:
import cv2
import json

class VideoFrameExtractor:
    def __init__(self, video_path, json_path, tracked_classes):
        self.video_path = video_path
        self.json_path = json_path
        self.tracked_classes = tracked_classes

    def extract_frames(self):
        # Загружаем данные из JSON файла
        with open(self.json_path, 'r') as f:
            detections_data = json.load(f)

        # Открываем видео
        cap = cv2.VideoCapture(self.video_path)

        if not cap.isOpened():
            print("Ошибка: не удалось открыть видео.")
            return

        frame_count = 0  # Счётчик кадров

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1  # Увеличиваем номер кадра

            # Проверяем, есть ли детекции для текущего кадра
            for detection in detections_data:
                if detection["frame"] == frame_count:
                    for obj in detection["detections"]:
                        if obj["class"] in self.tracked_classes:
                            # Отображаем кадр, если обнаружен нужный класс
                            cv2.imshow(f'Frame with {obj["class"]}', frame)

                            # Ждем нажатия клавиши, чтобы закрыть окно
                            if cv2.waitKey(0) & 0xFF == ord('q'):
                                break

        cap.release()
        cv2.destroyAllWindows()

# Пример использования:
video_path = "output_Pixel_2.mp4"
json_path = "output_Pixel_2.json"
tracked_classes = ["person", "car", "dog", "cell phone"]  # Указываем классы, которые нужно отслеживать

frame_extractor = VideoFrameExtractor(video_path, json_path, tracked_classes)
frame_extractor.extract_frames()
