# Show segmentation masks and track objects in a video.

In [2]:
# Show segmentation masks and track objects in a video.
import cv2
import os
import csv
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

# === Video and Model Setup ===
video_path = "data/road-pothole-1.MP4"
model = YOLO("runs/train19/weights/best.pt")  # segmentation model
cap = cv2.VideoCapture(video_path)

# === Video Properties ===
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH,
                                       cv2.CAP_PROP_FRAME_HEIGHT,
                                       cv2.CAP_PROP_FPS))

# === Output Video Path ===
base, ext = os.path.splitext(video_path)
new_video_path = f"{base}_seg-tracking_train3{ext}"
out = cv2.VideoWriter(new_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# === Display Window Setup ===
cv2.namedWindow("instance-segmentation-object-tracking", cv2.WINDOW_NORMAL)
cv2.resizeWindow("instance-segmentation-object-tracking", 1280, 720)

# === CSV Logging Setup ===
csv_path = f"{base}_tracking_results.csv"
csv_file = open(csv_path, mode='w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Frame", "Track_ID"])

# === Tracking Variables ===
pot_holes_id = set()
frame_idx = 0

# === Frame Processing Loop ===
while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=5)
    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None and results[0].masks is not None:
        masks = results[0].masks.xy
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for mask, track_id in zip(masks, track_ids):
            color = colors(int(track_id), True)
            txt_color = annotator.get_txt_color(color)


            # Convert polygon mask to NumPy format and draw it
            mask_np = np.array([mask], dtype=np.int32)
            cv2.fillPoly(im0, mask_np, color)

            # Optional: add label near first point of mask
            x, y = mask[0]
            cv2.putText(im0, str(track_id), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX,
                        0.7, txt_color, 2, cv2.LINE_AA)

            # Track new unique potholes
            if track_id not in pot_holes_id:
                pot_holes_id.add(track_id)

            # Write to CSV
            csv_writer.writerow([frame_idx, track_id])

    # Overlay tracked count on video
    cv2.putText(im0, f"Potholes tracked: {len(pot_holes_id)}", (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)

    # Write frame to video and show window
    out.write(im0)
    cv2.imshow("instance-segmentation-object-tracking", im0)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    frame_idx += 1

# === Cleanup ===
out.release()
cap.release()
csv_file.close()
cv2.destroyAllWindows()

# === Summary Output ===
print("Potholes ID: ", list(pot_holes_id))
print("Number of potholes detected: ", len(pot_holes_id))
print(f"Tracking results saved to: {csv_path}")



0: 384x640 (no detections), 69.0ms
Speed: 3.3ms preprocess, 69.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)
Potholes ID:  []
Number of potholes detected:  0
Tracking results saved to: data/road-pothole-1_tracking_results.csv


# Show bounding box and track ID when detecting a pothole

In [4]:
import cv2
import os
import csv
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

# === Video and Model Setup ===
video_path = "data/videos/GX011215_0-7_24fps.MP4"
model = YOLO("runs/train3/weights/best.pt")  # Segmentation or detection model
cap = cv2.VideoCapture(video_path)

# === Video Properties ===
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH,
                                       cv2.CAP_PROP_FRAME_HEIGHT,
                                       cv2.CAP_PROP_FPS))

# === Output Video Path ===
base, ext = os.path.splitext(video_path)
new_video_path = f"{base}_bbox-tracking_train3{ext}"
out = cv2.VideoWriter(new_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# === Display Window Setup ===
cv2.namedWindow("bbox-tracking", cv2.WINDOW_NORMAL)
cv2.resizeWindow("bbox-tracking", 1280, 720)

# === CSV Logging Setup ===
csv_path = f"{base}_bbox_tracking_results.csv"
csv_file = open(csv_path, mode='w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Frame", "Track_ID", "X1", "Y1", "X2", "Y2"])

# === Tracking Variables ===
pot_holes_id = set()
frame_idx = 0
last_frame_per_id = {}  # track_id: (frame image, frame number, bbox)
video_base_name = os.path.splitext(os.path.basename(video_path))[0]

# === Frame Processing Loop ===
while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=3)
    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None:
        boxes = results[0].boxes.xyxy.cpu().numpy()   # [x1, y1, x2, y2]
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for box, track_id in zip(boxes, track_ids):
            x1, y1, x2, y2 = map(int, box)
            color = colors(int(track_id), True)
            label = str(track_id)

            annotator.box_label([x1, y1, x2, y2], label, color=color)
            pot_holes_id.add(track_id)
            csv_writer.writerow([frame_idx, track_id, x1, y1, x2, y2])
            last_frame_per_id[track_id] = (im0.copy(), frame_idx)

    cv2.putText(im0, f"Potholes tracked: {len(pot_holes_id)}", (30, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)

    out.write(im0)
    cv2.imshow("bbox-tracking", im0)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    frame_idx += 1

# === Cleanup ===
out.release()
cap.release()
csv_file.close()
cv2.destroyAllWindows()

# === Save full last frame for each track ID ===
output_dir = f"{base}_pothole_frames"
os.makedirs(output_dir, exist_ok=True)

for track_id, (frame_img, frame_num) in last_frame_per_id.items():
    output_filename = f"{video_base_name}_Frame{frame_num}_ID{track_id}.jpg"
    output_path = os.path.join(output_dir, output_filename)
    cv2.imwrite(output_path, frame_img)

print("Potholes ID: ", list(pot_holes_id))
print("Number of potholes detected: ", len(pot_holes_id))
print(f"Tracking results saved to: {csv_path}")
print(f"Saved {len(last_frame_per_id)} full-frame images to: {output_dir}")


FileNotFoundError: [Errno 2] No such file or directory: 'data/videos/GX011215_0-7_24fps_bbox_tracking_results.csv'

# Bounding box with segmentation labels
EDGE_MARGIN = 50  # Pixels away from frame edges to consider as pothole full image.
Estimates GPS coordinates for each saved pothole image using frame timing and embeds them using exiftool.

In [7]:
import cv2
import os
import csv
from datetime import datetime, timedelta
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

# === Video and Model Setup ===
video_path = "data/road-pothole-1.MP4"
model = YOLO("runs/train19/weights/best.pt")  # Segmentation or detection model
cap = cv2.VideoCapture(video_path)

# === Video GPS information ===
def get_gps_coords(video_path):
    cmd = f'exiftool -p kml.fmt -ee3 {video_path} > {video_path}.kml'
    os.system(cmd)
    coords = []
    with open(f'{video_path}.kml', 'r') as f:
        lines = f.readlines()
    for line in lines:
        if 'coordinates' in line:
            line = line.replace('<coordinates>', '').replace(',0</coordinates>', '')
            coords.append([float(x) for x in line.split(',')])
    return coords

coords = get_gps_coords(video_path)

# === Video Properties ===
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH,
                                       cv2.CAP_PROP_FRAME_HEIGHT,
                                       cv2.CAP_PROP_FPS))

# Try to get creation time using exiftool
creation_time_str = os.popen(f"exiftool -CreateDate -d '%Y-%m-%d %H:%M:%S' {video_path}").read()
creation_time = None
if "Create Date" in creation_time_str:
    try:
        creation_time = datetime.strptime(creation_time_str.split(': ', 1)[-1].strip(), '%Y-%m-%d %H:%M:%S')
    except:
        creation_time = None

# === Output Video Path ===
base, ext = os.path.splitext(video_path)
new_video_path = f"{base}_pothole-tracking{ext}"
out = cv2.VideoWriter(new_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# === Display Window Setup ===
cv2.namedWindow("pothole-tracking", cv2.WINDOW_NORMAL)
cv2.resizeWindow("pothole-tracking", 1280, 720)

# === Tracking Variables ===
pot_holes_id = set()
frame_idx = 0
last_frame_per_id = {}  # track_id: (frame number, frame image, mask, box)
video_base_name = os.path.splitext(os.path.basename(video_path))[0]

EDGE_MARGIN = 50  # Pixels away from frame edges to consider as safe

# === Frame Processing Loop ===
while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=3)
    results = model.track(im0, persist=True)

    if results[0].boxes.id is not None:
        boxes = results[0].boxes.xyxy.cpu().numpy()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        masks = results[0].masks.xy if results[0].masks is not None else [None] * len(track_ids)

        for box, track_id, mask in zip(boxes, track_ids, masks):
            x1, y1, x2, y2 = map(int, box)

            if (x1 <= EDGE_MARGIN or y1 <= EDGE_MARGIN or
                x2 >= w - EDGE_MARGIN or y2 >= h - EDGE_MARGIN):
                continue

            color = colors(int(track_id), True)
            label = str(track_id)

            annotator.box_label([x1, y1, x2, y2], label, color=color)
            pot_holes_id.add(track_id)
            last_frame_per_id[track_id] = (frame_idx, im0.copy(), mask, (x1, y1, x2, y2))

    overlay = im0.copy()
    alpha = 0.6
    text1 = f"Total potholes found: {len(pot_holes_id)}"
    text2 = f"Pothole tracked: {pot_holes_id}"

    (tw1, th1), _ = cv2.getTextSize(text1, cv2.FONT_HERSHEY_SIMPLEX, 2, 3)
    (tw2, th2), _ = cv2.getTextSize(text2, cv2.FONT_HERSHEY_SIMPLEX, 2, 3)

    cv2.rectangle(overlay, (25, 65 - th1), (35 + tw1, 105), (255, 255, 255), -1)
    cv2.rectangle(overlay, (25, 5 - th2), (35 + tw2, 45), (255, 255, 255), -1)

    cv2.addWeighted(overlay, alpha, im0, 1 - alpha, 0, im0)

    cv2.putText(im0, text1, (30, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (128, 0, 128), 3, cv2.LINE_AA)
    cv2.putText(im0, text2, (30, 40), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 3, cv2.LINE_AA)

    out.write(im0)
    cv2.imshow("bbox-tracking", im0)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

    frame_idx += 1

# === Cleanup ===
out.release()
cap.release()
cv2.destroyAllWindows()

# === Save last frame and YOLO v11 segmentation labels for each Track ID ===
output_dir = f"{base}_pothole_frames"
os.makedirs(output_dir, exist_ok=True)

# Update CSV to only log final detections with GPS and timestamp
csv_path = f"{base}_pothole_tracking_results.csv"
with open(csv_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Frame", "Track_ID", "X1", "Y1", "X2", "Y2", "Latitude", "Longitude", "Timestamp"])

    for track_id, (frame_num, frame_img, mask, (x1, y1, x2, y2)) in last_frame_per_id.items():
        output_filename = f"{video_base_name}_Frame{frame_num}_ID{track_id}.jpg"
        output_path = os.path.join(output_dir, output_filename)
        cv2.imwrite(output_path, frame_img)

        cur_gps = [None, None]
        if coords and frame_num // int(fps) < len(coords) - 1:
            i = frame_num
            fps_int = int(fps)
            idx = i // fps_int
            frac = (i % fps_int) / fps_int
            GPS_1 = coords[idx]
            GPS_2 = coords[idx + 1]
            cur_gps = [
                GPS_1[0] + (GPS_2[0] - GPS_1[0]) * frac,
                GPS_1[1] + (GPS_2[1] - GPS_1[1]) * frac
            ]

            latitude_ref = 'N' if cur_gps[1] >= 0 else 'S'
            longitude_ref = 'E' if cur_gps[0] >= 0 else 'W'
            cmd = f"exiftool -GPSLatitude={abs(cur_gps[1])} -GPSLongitude={abs(cur_gps[0])} -GPSLatitudeRef={latitude_ref} -GPSLongitudeRef={longitude_ref} {output_path}"
            os.system(cmd)

        timestamp = ""
        if creation_time:
            timestamp_dt = creation_time + timedelta(seconds=(frame_num / fps))
            timestamp = timestamp_dt.strftime('%m-%d-%Y %H:%M:%S')

        csv_writer.writerow([frame_num, track_id, x1, y1, x2, y2, cur_gps[1], cur_gps[0], timestamp])

        if mask is not None:
            class_id = 0  # pothole class
            normalized_coords = [f"{x / w:.6f} {y / h:.6f}" for x, y in mask]
            label_line = f"{class_id} " + " ".join(normalized_coords)
            label_filename = f"{video_base_name}_Frame{frame_num}_ID{track_id}.txt"
            label_path = os.path.join(output_dir, label_filename)
            with open(label_path, 'w') as label_file:
                label_file.write(label_line + "\n")

# === Delete *.jpg_original files ===
for file in os.listdir(output_dir):
    if file.endswith(".jpg_original"):
        os.remove(os.path.join(output_dir, file))

print("Potholes ID: ", list(pot_holes_id))
print("Number of potholes detected: ", len(pot_holes_id))
print(f"Tracking results saved to: {csv_path}")
print(f"Saved {len(last_frame_per_id)} full-frame images and label files to: {output_dir}")





0: 384x640 (no detections), 48.4ms
Speed: 1.8ms preprocess, 48.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 33.3ms
Speed: 2.5ms preprocess, 33.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 38.3ms
Speed: 2.7ms preprocess, 38.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 1.9ms preprocess, 19.6ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 27.1ms
Speed: 2.1ms preprocess, 27.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 44.4ms
Speed: 2.9ms preprocess, 44.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 26.4ms
Speed: 2.1ms preprocess, 26.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 pothole, 16.0ms
Speed: 2.8ms preprocess, 16.0ms inferen

# END