In [None]:
!pip install ultralytics




In [None]:
import cv2
import torch
import numpy as np
from google.colab.patches import cv2_imshow
from ultralytics import YOLO


In [None]:

def extract_keypoints(person_kpts):
    try:
        nose = person_kpts[0]
        left_shoulder = person_kpts[5]
        right_shoulder = person_kpts[6]
        left_hip = person_kpts[11]
        right_hip = person_kpts[12]

        mid_hip = (left_hip + right_hip) / 2
        mid_shoulder = (left_shoulder + right_shoulder) / 2
        keypoints = np.array([nose, mid_shoulder, mid_hip])

        valid = []
        for pt in keypoints:
            if pt[0] > 0 and pt[1] > 0:
                valid.append(pt)

        if not valid:
            return None

        x, y = np.mean(valid, axis=0).astype(int)
        return (x, y)
    except:
        return None


##**Single person**

In [None]:
def initialize_tracking():
    return {
        'routes': {},
        'target_id': None
    }

In [None]:
def process_target_detection(track_id, pt, box, tracking_data):
    routes = tracking_data['routes']
    target_id = tracking_data['target_id']

    if target_id is None:
        tracking_data['target_id'] = track_id
        target_id = track_id

    if track_id != target_id:
        return False

    if track_id not in routes:
        routes[track_id] = []

    routes[track_id].append(pt)
    return True


In [None]:
def draw_target_tracking(frame, track_id, pt, box, routes):
    x1, y1, x2, y2 = box.astype(int)
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(frame, f'ID {track_id}', (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    for j in range(1, len(routes[track_id])):
        cv2.line(frame, routes[track_id][j - 1], routes[track_id][j], (0, 255, 0), 4)


In [None]:
def track_target_person(video_path, output_path, model):
    video = cv2.VideoCapture(video_path)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    out = cv2.VideoWriter(output_path,
                         cv2.VideoWriter_fourcc(*'mp4v'),
                         fps,
                         (width, height))

    tracking_data = initialize_tracking()

    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break
        results = model.track(frame, persist=True, classes=[0]) 3

        if results[0].boxes.id is not None and results[0].keypoints is not None:
            ids = results[0].boxes.id.cpu().numpy()
            boxes = results[0].boxes.xyxy.cpu().numpy()
            keypoints = results[0].keypoints.xy.cpu().numpy()

            for track_id, box, kpts in zip(ids, boxes, keypoints):
                track_id = int(track_id)
                pt = extract_keypoints(kpts)
                if pt is None:
                    continue

                is_target = process_target_detection(track_id, pt, box, tracking_data)

                if is_target:
                    draw_target_tracking(frame, track_id, pt, box, tracking_data['routes'])

        out.write(frame)

    video.release()
    out.release()

model = YOLO('yolo11n-pose.pt')
track_target_person('people.mp4', 'output_video.mp4', model)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0: 384x640 3 persons, 227.0ms
Speed: 6.0ms preprocess, 227.0ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 232.7ms
Speed: 6.8ms preprocess, 232.7ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 217.9ms
Speed: 7.2ms preprocess, 217.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 140.3ms
Speed: 4.5ms preprocess, 140.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 147.3ms
Speed: 5.1ms preprocess, 147.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 146.3ms
Speed: 9.5ms preprocess, 146.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 159.5ms
Speed: 4.9ms preprocess, 159.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 145.9ms
Sp

## **Multiple people**

In [None]:

def initialize_tracking():

    return {
        'routes': {},
        'colors': {},
        'active_ids': {},
        'lost_ids': {}
    }


In [None]:
def check_reid(pt, lost_ids, reid_threshold):
    for lost_id, last_pt in lost_ids.items():
        if np.linalg.norm(np.array(last_pt) - np.array(pt)) < reid_threshold:
            return lost_id
    return None

In [None]:
def process_detection(track_id, pt, box, tracking_data, reid_threshold):
    routes = tracking_data['routes']
    colors = tracking_data['colors']
    active_ids = tracking_data['active_ids']
    lost_ids = tracking_data['lost_ids']

    if track_id not in routes:
        reid_id = check_reid(pt, lost_ids, reid_threshold)
        if reid_id is not None:
            track_id = reid_id
            del lost_ids[reid_id]

        routes.setdefault(track_id, [])
        colors.setdefault(track_id, tuple(np.random.randint(0, 255, 3).tolist()))
        active_ids[track_id] = 0

    active_ids[track_id] = 0
    routes[track_id].append(pt)

    return track_id

In [None]:
def draw_tracking(frame, track_id, pt, box, color):
    x1, y1, x2, y2 = box.astype(int)
    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
    cv2.putText(frame, f'ID {track_id}', (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
    cv2.circle(frame, pt, 4, color, -1)


In [None]:
def update_lost_tracks(seen_ids, tracking_data, max_missed):
    active_ids = tracking_data['active_ids']
    routes = tracking_data['routes']
    colors = tracking_data['colors']
    lost_ids = tracking_data['lost_ids']

    for tid in list(active_ids.keys()):
        if tid not in seen_ids:
            active_ids[tid] += 1
            if active_ids[tid] <= max_missed and routes[tid]:
                lost_ids[tid] = routes[tid][-1]
            else:
                active_ids.pop(tid, None)
                colors.pop(tid, None)
                lost_ids.pop(tid, None)


In [None]:
def draw_trajectories(frame, tracking_data):
    routes = tracking_data['routes']
    colors = tracking_data['colors']

    for tid, trail in routes.items():
        if tid in colors:
            for i in range(1, len(trail)):
                cv2.line(frame, trail[i - 1], trail[i], colors[tid], 2)

In [None]:
def track_people(video_path, output_path, model, max_missed=15, reid_threshold=50):
    video = cv2.VideoCapture(video_path)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    out = cv2.VideoWriter(output_path,
                         cv2.VideoWriter_fourcc(*'mp4v'),
                         fps,
                         (width, height))

    tracking_data = initialize_tracking()

    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break

        results = model.track(frame, persist=True, classes=[0])
        seen_ids = set()

        if results[0].boxes.id is not None and results[0].keypoints is not None:
            ids = results[0].boxes.id.cpu().numpy()
            boxes = results[0].boxes.xyxy.cpu().numpy()
            kpts = results[0].keypoints.xy.cpu().numpy()

            for track_id, person_kpts, box in zip(ids, kpts, boxes):
                track_id = int(track_id)
                seen_ids.add(track_id)

                pt = extract_keypoints(person_kpts)
                if pt is None:
                    continue

                track_id = process_detection(track_id, pt, box, tracking_data, reid_threshold)

                draw_tracking(frame, track_id, pt, box, tracking_data['colors'][track_id])

        update_lost_tracks(seen_ids, tracking_data, max_missed)

        draw_trajectories(frame, tracking_data)

        out.write(frame)

    video.release()
    out.release()

model = YOLO("yolo11n-pose.pt")
track_people('people.mp4', 'multiple.mp4', model)




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0: 384x640 3 persons, 143.4ms
Speed: 4.6ms preprocess, 143.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 139.9ms
Speed: 6.1ms preprocess, 139.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 138.9ms
Speed: 7.1ms preprocess, 138.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 142.6ms
Speed: 5.6ms preprocess, 142.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 142.9ms
Speed: 5.5ms preprocess, 142.9ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 143.4ms
Speed: 5.8ms preprocess, 143.4ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 154.1ms
Speed: 5.6ms preprocess, 154.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 137.1ms
Sp