In [5]:
import os
import torch
import shutil
import uuid
import subprocess
from ultralytics import YOLO
import cv2
import numpy as np
from sklearn.cluster import KMeans
from collections import defaultdict
from collections import Counter

YOLO_MODEL_PATH = "/home/jupyter/datasphere/project/train18_best_model.pt"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
PEDESTRIAN_CLASSES = [4]
YOLO_CONFIDENCE_THRESHOLD = 0.3
ULTRALYTICS_OUTPUT_DIR = "/home/jupyter/datasphere/project/runs/track"

temp_input_video_path = "/home/jupyter/datasphere/project/MOT20-01-raw.mp4"

### Подсчет среднего потока пешеходов через кластеризацию.

In [2]:
class YoloVideoFlowAnalyzer:
    def __init__(self):
        self.model = None
        self.device = DEVICE
        self.model = YOLO(YOLO_MODEL_PATH)
        self.model.to(DEVICE)

    def process_and_track_with_clusters(self, input_path: str):
        # трекинг
        cap = cv2.VideoCapture(input_path)
        fps = cap.get(cv2.CAP_PROP_FPS) or 30
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        trajectories = defaultdict(list)
        detections = defaultdict(list)

        frame_idx = 0
        results = self.model.track(
            source=input_path,
            tracker='bytetrack.yaml',
            classes=PEDESTRIAN_CLASSES,
            conf=YOLO_CONFIDENCE_THRESHOLD,
            stream=True,
            device=self.device,
            half=True
        )

        for res in results:
            ret, _ = cap.read()
            if not ret:
                break

            if res.boxes is not None:
                ids = res.boxes.id.int().cpu().tolist()
                xyxy = res.boxes.xyxy.cpu().numpy()
                for track_id, box in zip(ids, xyxy):
                    x1, y1, x2, y2 = box.astype(int)
                    cx, cy = (x1+x2)//2, (y1+y2)//2

                    trajectories[track_id].append((frame_idx, cx, cy))
                    detections[frame_idx].append((track_id, (x1, y1, x2, y2)))
            frame_idx += 1

        cap.release()

        # кластеризация
        features = []
        track_ids = []
        for t_id, pts in trajectories.items():
            start = np.array(pts[0][1:])
            end   = np.array(pts[-1][1:])
            features.append(end - start)
            track_ids.append(t_id)
        features = np.vstack(features)

        kmeans = KMeans(n_clusters=2, random_state=0).fit(features)
        labels = {tid: lbl for tid, lbl in zip(track_ids, kmeans.labels_)}

        cluster_colors = {
            -1: (128, 128, 128),
            0: (0, 255, 0),
            1: (0, 0, 255),
            2: (255, 0, 0),
            3: (255, 255, 0),
            4: (255, 0, 255),
            5: (0, 255, 255)
        }

        # рисуем и сохраняем видео
        cap = cv2.VideoCapture(input_path)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out_path = input_path.replace('.mp4', '_clusters.mp4')
        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))

        frame_idx = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            for track_id, box in detections.get(frame_idx, []):
                x1, y1, x2, y2 = box
                lbl = labels.get(track_id, None)
                color = cluster_colors.get(lbl, (255,255,255))
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, str(track_id), (x1, y1-5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            for track_id, pts in trajectories.items():
                lbl = labels.get(track_id, None)
                color = cluster_colors.get(lbl, (255,255,255))
                recent = [p for p in pts if p[0] <= frame_idx][-10:]
                for i in range(1, len(recent)):
                    _, x0, y0 = recent[i-1]
                    _, x1, y1 = recent[i]
                    cv2.line(frame, (x0, y0), (x1, y1), color, 2)

            writer.write(frame)
            frame_idx += 1

        cap.release()
        writer.release()

        return out_path, trajectories, labels

In [3]:
processor = YoloVideoFlowAnalyzer()
video, trajs, labels = processor.process_and_track_with_clusters("/home/jupyter/datasphere/project/MOT20-01-raw.mp4")
video


video 1/1 (frame 1/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 18 persons, 433.9ms
video 1/1 (frame 2/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 15 persons, 20.6ms
video 1/1 (frame 3/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 20 persons, 21.0ms
video 1/1 (frame 4/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 21 persons, 20.7ms
video 1/1 (frame 5/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 24 persons, 20.9ms
video 1/1 (frame 6/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 25 persons, 20.9ms
video 1/1 (frame 7/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 25 persons, 20.9ms
video 1/1 (frame 8/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 27 persons, 21.1ms
video 1/1 (frame 9/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x960 25 persons, 21.5ms
video 1/1 (frame 10/429) /home/jupyter/datasphere/project/MOT20-01-raw.mp4: 544x



'/home/jupyter/datasphere/project/MOT20-01-raw_clusters.mp4'

In [6]:
cluster_counts = Counter(labels.values())

print("Количество пешеходов в каждом кластере:")
for cluster_id, count in cluster_counts.items():
    print(f"Кластер {cluster_id}: {count} пешеходов")

Количество пешеходов в каждом кластере:
Кластер 0: 283 пешеходов
Кластер 1: 21 пешеходов
