In [None]:
from ultralytics import YOLO
import cv2

# Load model
model = YOLO('yolov8x.pt')


# Connect to RTSP stream
rtsp_url = 'rtsp://admin:job4032004@192.168.1.108:554/cam/realmonitor?channel=7&subtype=0'
cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Inference
    results = model.predict(source=frame, show=False, conf=0.5)

    # Annotate and display frame
    annotated_frame = results[0].plot()
    cv2.imshow("YOLO Detection", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [70]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and move it to GPU
model = YOLO('yolov8x.pt').to('cuda')

# Connect to RTSP stream
rtsp_url = 'rtsp://admin:job4032004@192.168.1.108:554/cam/realmonitor?channel=5&subtype=0'
cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)

# For FPS calculation
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Start timing
    start_time = time.time()

    # Inference on GPU
    results = model.predict(source=frame, device=0, show=False, conf=0.5)

    # Annotate frame
    annotated_frame = results[0].plot()

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Overlay FPS on the frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLO Detection [GPU]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 (no detections), 7.0ms
Speed: 1.6ms preprocess, 7.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 6.8ms
Speed: 2.1ms preprocess, 6.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 7.3ms
Speed: 2.1ms preprocess, 7.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 6.8ms
Speed: 1.8ms preprocess, 6.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 7.4ms
Speed: 1.7ms preprocess, 7.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 7.1ms
Speed: 2.1ms preprocess, 7.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 7.1ms
Speed: 1.6ms preprocess, 7.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 7.1ms
Speed: 1.8ms preprocess, 7.1ms inference, 0.3ms 

In [3]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and move to GPU
model = YOLO('yolov5nu.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Downloads\Driver's Eye View - Welsh Highland Railway (Rheilffordd Eryri) - Porthmadog to Caernarfon - YouTube - Google Chrome 2025-06-20 23-12-20.mp4"
cap = cv2.VideoCapture(video_path)

# Connect to RTSP stream
#rtsp_url = 'rtsp://admin:job4032004@192.168.1.108:554/cam/realmonitor?channel=1&subtype=0'
#cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)

# FPS timer
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    start_time = time.time()

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.05)

    # Annotate frame
    annotated_frame = results[0].plot()

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Show FPS on frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display result
    cv2.imshow("YOLO Detection [Video Test]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 31 persons, 4 trains, 1 umbrella, 1 suitcase, 1 kite, 4.5ms
Speed: 1.1ms preprocess, 4.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 31 persons, 4 trains, 1 umbrella, 1 suitcase, 1 kite, 10.1ms
Speed: 1.9ms preprocess, 10.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 31 persons, 5 trains, 1 umbrella, 1 kite, 4.1ms
Speed: 1.8ms preprocess, 4.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 30 persons, 5 trains, 1 umbrella, 1 kite, 4.2ms
Speed: 1.3ms preprocess, 4.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 5 trains, 1 truck, 1 umbrella, 1 kite, 1 skateboard, 4.2ms
Speed: 2.0ms preprocess, 4.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 5 trains, 2 backpacks, 1 umbrella, 1 kite, 6.6ms
Speed: 1.2ms preprocess, 6.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0

In [None]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and move to GPU
model = YOLO('yolov5su.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Downloads\20250327_045214.mp4"
cap = cv2.VideoCapture(video_path)

# FPS timer
prev_time = time.time()

frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    frame_count += 1

    # Start timer for FPS
    start_time = time.time()

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.5, verbose=False)

    # End timer and compute FPS
    end_time = time.time()
    fps = 1 / (end_time - prev_time)
    prev_time = end_time

    # Get result info
    result = results[0]
    boxes = result.boxes
    names = model.names
    counts = {}

    # Count each detected class
    if boxes is not None:
        for cls_id in boxes.cls.tolist():
            label = names[int(cls_id)]
            counts[label] = counts.get(label, 0) + 1

    # Construct detection string like "1 person, 2 cups"
    detection_str = ", ".join([f"{v} {k}" for k, v in counts.items()])
    if not detection_str:
        detection_str = "no detections"

    # Print output like YOLO
    print(f"{frame_count}: {result.orig_shape[0]}x{result.orig_shape[1]} {detection_str}")
    print(f"FPS: {fps:.2f}")
    print(f"Speed: {result.speed['preprocess']:.1f}ms preprocess, "
          f"{result.speed['inference']:.1f}ms inference, "
          f"{result.speed['postprocess']:.1f}ms postprocess per image "
          f"at shape {tuple(result.orig_shape[::-1])}\n")

cap.release()


In [None]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and move to GPU
model = YOLO('yolov5su.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Downloads\20250327_045214.mp4"
cap = cv2.VideoCapture(video_path)

# FPS timer
prev_time = time.time()
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    start_time = time.time()

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.5)

    # Plot only every 5 frames
    if frame_count % 20 == 0:
        annotated_frame = results[0].plot()
    else:
        annotated_frame = frame

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Show FPS on frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display result
    cv2.imshow("YOLO Detection [Video Test]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_count += 1

cap.release()
cv2.destroyAllWindows()


In [None]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and move to GPU
model = YOLO('yolo11n.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Downloads\20250327_045214.mp4"
cap = cv2.VideoCapture(video_path)
#ap = cv2.VideoCapture(1)

# FPS tracking variables
prev_time = time.time()
total_time = 0
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    start_time = time.time()

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.5)

    # Annotate frame
    annotated_frame = results[0].plot()

    # Calculate FPS
    curr_time = time.time()
    frame_time = curr_time - prev_time
    prev_time = curr_time
    fps = 1 / frame_time

    total_time += frame_time
    frame_count += 1

    # Show FPS on frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display result
    cv2.imshow("YOLO Detection [Video Test]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Print average FPS
if total_time > 0 and frame_count > 0:
    avg_fps = frame_count / total_time
    print(f"\n✅ Average FPS: {avg_fps:.2f} over {frame_count} frames")
else:
    print("⚠️ Not enough frames to calculate average FPS.")


In [None]:
from ultralytics import YOLO
import cv2
import time

# Load model
#model = YOLO('yolo11n.pt')
#model.export(format="engine")

trt_model = YOLO("yolo11x.engine")

# Connect to RTSP stream
rtsp_url = 'rtsp://admin:job4032004@192.168.1.108:554/cam/realmonitor?channel=5&subtype=0'
cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)


# Path to your local video file
#video_path = r"C:\Users\User\Downloads\20250327_045214.mp4"
#cap = cv2.VideoCapture(video_path)

# FPS calculation
prev_time = time.time()
fps = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Inference
    results = trt_model.predict(source=frame, show=False, conf=0.5)

    # Annotate frame
    annotated_frame = results[0].plot()

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Put FPS text on frame
    cv2.putText(annotated_frame, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLO Detection", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [13]:
from ultralytics import YOLO
import cv2
import time
import numpy as np

# Load YOLO model and move to GPU
model = YOLO('yolov5nu.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Downloads\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
cap = cv2.VideoCapture(video_path)

# FPS timer
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    start_time = time.time()

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.05)

    # Copy the frame for annotation
    annotated_frame = frame.copy()

    # Draw bounding boxes without class or confidence
    boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
    for box in boxes:
        x1, y1, x2, y2 = box
        cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Show FPS on frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display result
    cv2.imshow("YOLO Detection [Video Test]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 29 persons, 2 trains, 2 backpacks, 8 umbrellas, 2 handbags, 4.9ms
Speed: 1.1ms preprocess, 4.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 2 trains, 1 traffic light, 1 backpack, 9 umbrellas, 1 handbag, 1 chair, 6.1ms
Speed: 1.4ms preprocess, 6.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 2 trains, 2 backpacks, 9 umbrellas, 1 handbag, 5.3ms
Speed: 1.3ms preprocess, 5.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 31 persons, 1 car, 2 trains, 5 umbrellas, 4.7ms
Speed: 1.2ms preprocess, 4.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 1 car, 2 trains, 6 umbrellas, 4.9ms
Speed: 1.7ms preprocess, 4.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 28 persons, 1 car, 1 train, 6 umbrellas, 6.7ms
Speed: 1.3ms preprocess, 6.7ms inference, 1.6ms postprocess per image at shape (1, 3

In [None]:
from ultralytics import YOLO
import cv2
import time
import torch

# Load YOLO model and move to GPU
model = YOLO('yolov5n.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Videos\2025-06-20 22-48-51.mp4"
cap = cv2.VideoCapture(video_path)

# FPS timer
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.05)

    # Get first frame result
    result = results[0]

    # Convert to tensor for boolean masking
    classes = result.boxes.cls
    mask = classes != 6  # class 7 = "train" in COCO

    # Apply mask to filter out 'train' class
    result.boxes = result.boxes[mask]

    # Annotate frame
    annotated_frame = result.plot()

    # Calculate and display FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show frame
    cv2.imshow("YOLO Detection [Video Test]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [45]:
#With frame CAP


from ultralytics import YOLO
import cv2
import time
import numpy as np

# Load YOLO model and move to GPU
model = YOLO('yolov5nu.pt').to('cuda')

# Path to your local video file
video_path = r"C:\Users\User\Documents\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
cap = cv2.VideoCapture(video_path)

# Get source video FPS
video_fps = cap.get(cv2.CAP_PROP_FPS)
frame_duration = 0.9 / video_fps if video_fps > 0 else 1.0 / 30

# FPS timer
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video or failed to read frame.")
        break

    start_time = time.time()

    # Run YOLO inference on GPU
    results = model.predict(source=frame, device=0, conf=0.05)

    # Copy the frame for annotation
    annotated_frame = frame.copy()

    # Draw bounding boxes without class or confidence
    boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
    for box in boxes:
        x1, y1, x2, y2 = box
        cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Show FPS on frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display result
    cv2.imshow("YOLO Detection [Video Test]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Cap the FPS to match the source video
    # elapsed = time.time() - start_time
    # delay = max(0, frame_duration - elapsed)
    # time.sleep(delay)

cap.release()
cv2.destroyAllWindows()



0: 384x640 28 persons, 2 trains, 1 traffic light, 2 backpacks, 8 umbrellas, 2 handbags, 5.1ms
Speed: 1.6ms preprocess, 5.1ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 2 trains, 2 backpacks, 9 umbrellas, 1 handbag, 5.6ms
Speed: 1.5ms preprocess, 5.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 31 persons, 1 car, 2 trains, 5 umbrellas, 7.1ms
Speed: 1.4ms preprocess, 7.1ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 1 car, 3 trains, 1 backpack, 8 umbrellas, 7.5ms
Speed: 1.7ms preprocess, 7.5ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 3 cars, 2 trains, 1 traffic light, 6 umbrellas, 8.0ms
Speed: 1.6ms preprocess, 8.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 1 car, 2 trains, 1 truck, 2 fire hydrants, 6 umbrellas, 2 handbags, 6.9ms
Speed: 1.2ms preprocess, 6.9ms inference

In [43]:
import torch
import cv2
import time
import numpy as np

# Load trained YOLOv5 model
model = torch.hub.load(r"C:\Users\User\yolov5", 'custom',
                       path=r"C:\Users\User\yolov5\runs\train\railway_model3\weights\best.pt",
                       source='local')
model.conf = 0.05  # Confidence threshold
model.to('cuda')   # Use GPU

# Open video
video_path = r"C:\Users\User\Documents\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_duration = 0.9 / fps if fps > 0 else 1.0 / 30
prev_time = time.time()

# Output video writer
output_path = r"C:\Users\User\Documents\railway_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    start_time = time.time()

    # Run inference
    results = model(frame)
    boxes = results.xyxy[0].cpu().numpy()  # [x1, y1, x2, y2, conf, cls]

    annotated_frame = frame.copy()

    if len(boxes) > 0:
        # Find box with highest confidence
        best_box = boxes[np.argmax(boxes[:, 4])]
        x1, y1, x2, y2 = best_box[:4].astype(int)
        conf = best_box[4]
        cls = int(best_box[5])

        # Draw bounding box and confidence
        cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(annotated_frame, f"{conf:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Show FPS
    curr_time = time.time()
    fps_display = 1 / (curr_time - prev_time)
    cv2.putText(annotated_frame, f"FPS: {fps_display:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    prev_time = curr_time

    # Display frame
    cv2.imshow("YOLOv5 Detection", annotated_frame)

    # Save frame to output video
    out.write(annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Cap FPS
    # elapsed = time.time() - start_time
    # delay = max(0, frame_duration - elapsed)
    # time.sleep(delay)

cap.release()
out.release()
cv2.destroyAllWindows()


YOLOv5  v7.0-421-g79c4c31d Python-3.10.9 torch-2.1.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Ti, 8192MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


In [42]:
from ultralytics import YOLO
import cv2
import time
import numpy as np

# Load YOLOv8 segmentation model
model = YOLO(r"C:\vs\Railway Seg.v1i.yolov8\runs\segment\train2\weights\best.pt")

# Open video
video_path = r"C:\Users\User\Documents\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_duration = 0.9 / fps if fps > 0 else 1.0 / 30
prev_time = time.time()

# Output video writer
output_path = r"C:\Users\User\Documents\railway_seg_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    start_time = time.time()

    # Run segmentation inference
    results = model(frame)

    # Get segmentation mask and draw
    annotated_frame = results[0].plot()  # this draws the mask(s)

    # Show FPS
    curr_time = time.time()
    fps_display = 1 / (curr_time - prev_time)
    cv2.putText(annotated_frame, f"FPS: {fps_display:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    prev_time = curr_time

    # Display
    cv2.imshow("YOLOv8 Segmentation", annotated_frame)

    # Save frame to output video
    out.write(annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Cap FPS
    # elapsed = time.time() - start_time
    # delay = max(0, frame_duration - elapsed)
    # time.sleep(delay)

cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 1 railway, 7.5ms
Speed: 1.7ms preprocess, 7.5ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.4ms
Speed: 1.3ms preprocess, 7.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.9ms
Speed: 1.8ms preprocess, 8.9ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.1ms
Speed: 1.9ms preprocess, 8.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.2ms
Speed: 1.8ms preprocess, 7.2ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.0ms
Speed: 1.5ms preprocess, 7.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.0ms
Speed: 1.9ms preprocess, 8.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.9ms
Speed: 1.2ms preprocess, 6.9ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)


In [40]:
import cv2
import time
import numpy as np
from ultralytics import YOLO

# Load YOLOv8 segmentation model
model = YOLO(r"C:\vs\Railway Seg.v1i.yolov8\runs\segment\train2\weights\best.pt")
model.to("cuda")  # Use GPU if available
print(model.device)  # should say: cuda:0


# Open video
video_path = r"C:\Users\User\Documents\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
prev_time = time.time()

# Output video writer
output_path = r"C:\Users\User\Documents\railway_seg_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    start_time = time.time()

    # Run YOLOv8 segmentation
    results = model(frame)
    masks = results[0].masks

    # Copy original frame
    annotated_frame = frame.copy()

    if masks is not None:
        for mask in masks.data:
            mask = mask.cpu().numpy()
            mask_resized = cv2.resize(mask, (frame_width, frame_height))

            colored_mask = np.zeros_like(annotated_frame, dtype=np.uint8)
            colored_mask[mask_resized > 0.5] = (255, 0, 0)  # Blue mask

            binary_mask = (mask_resized > 0.5).astype(np.uint8)
            binary_mask_3c = np.stack([binary_mask]*3, axis=-1)

            alpha = 0.5
            annotated_frame = np.where(
                binary_mask_3c == 1,
                (alpha * colored_mask + (1 - alpha) * annotated_frame).astype(np.uint8),
                annotated_frame
            )

    # Show FPS
    curr_time = time.time()
    fps_display = 1 / (curr_time - prev_time)
    cv2.putText(annotated_frame, f"FPS: {fps_display:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    prev_time = curr_time

    # Show and save
    cv2.imshow("YOLOv8 Segmentation", annotated_frame)
    out.write(annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Cap FPS — REMOVED!
    # elapsed = time.time() - start_time
    # delay = max(0, frame_duration - elapsed)
    # time.sleep(delay)

cap.release()
out.release()
cv2.destroyAllWindows()


cuda:0

0: 384x640 1 railway, 5.9ms
Speed: 1.2ms preprocess, 5.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.4ms
Speed: 1.7ms preprocess, 7.4ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.6ms
Speed: 1.2ms preprocess, 7.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.1ms
Speed: 1.3ms preprocess, 7.1ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.4ms
Speed: 1.2ms preprocess, 6.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.1ms
Speed: 1.2ms preprocess, 6.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 5.8ms
Speed: 1.7ms preprocess, 5.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 5.6ms
Speed: 1.5ms preprocess, 5.6ms inference, 2.3ms postprocess per image at shape (1, 3, 384

In [58]:
from ultralytics import YOLO
import cv2
import time
import numpy as np

# Load YOLOv8 segmentation model
model = YOLO(r"C:\vs\Railway Seg.v1i.yolov8\runs\segment\train2\weights\best.pt")
model.to("cuda")  # Use GPU if available

# Open video
video_path = r"C:\Users\User\Documents\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_duration = 0.9 / fps if fps > 0 else 1.0 / 30
prev_time = time.time()

# Output video writer
output_path = r"C:\Users\User\Documents\railway_seg_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    start_time = time.time()

    # Run YOLOv8 segmentation
    results = model(frame)
    masks = results[0].masks

    # Copy original frame
    annotated_frame = frame.copy()

    if masks is not None:
        center_x1 = int(frame_width * 0.4)
        center_x2 = int(frame_width * 0.6)

        for mask in masks.data:
            mask = mask.cpu().numpy()
            mask_resized = cv2.resize(mask, (frame_width, frame_height))

            # Check if this mask overlaps the central region
            central_region = mask_resized[:, center_x1:center_x2]
            overlap_ratio = np.mean(central_region > 0.5)

            if overlap_ratio < 0.05:  # skip masks that don't significantly overlap center
                continue

            # Create color mask for rail
            colored_mask = np.zeros_like(annotated_frame, dtype=np.uint8)
            colored_mask[mask_resized > 0.5] = (255, 0, 0)  # Blue

            # Blend with original frame
            binary_mask = (mask_resized > 0.5).astype(np.uint8)
            binary_mask_3c = np.stack([binary_mask]*3, axis=-1)

            alpha = 0.5
            annotated_frame = np.where(
                binary_mask_3c == 1,
                (alpha * colored_mask + (1 - alpha) * annotated_frame).astype(np.uint8),
                annotated_frame
            )


    # Show FPS
    curr_time = time.time()
    fps_display = 1 / (curr_time - prev_time)
    cv2.putText(annotated_frame, f"FPS: {fps_display:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    prev_time = curr_time

    # Show and save
    cv2.imshow("YOLOv8 Segmentation", annotated_frame)
    out.write(annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # # Cap FPS
    # elapsed = time.time() - start_time
    # delay = max(0, frame_duration - elapsed)
    # time.sleep(delay)

cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 1 railway, 6.7ms
Speed: 2.8ms preprocess, 6.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.4ms
Speed: 2.5ms preprocess, 7.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.5ms
Speed: 1.9ms preprocess, 6.5ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.7ms
Speed: 2.3ms preprocess, 6.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.5ms
Speed: 1.8ms preprocess, 6.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.7ms
Speed: 2.0ms preprocess, 6.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 5.8ms
Speed: 2.2ms preprocess, 5.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.5ms
Speed: 1.6ms preprocess, 6.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)


In [78]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and move it to GPU
model = YOLO(r"C:\vs\Railway Seg.v1i.yolov8\runs\segment\train2\weights\best.pt").to('cuda')

# Connect to RTSP stream
video_path = r"C:\Users\User\Downloads\Drivers view Thailand, Thon Buri to Sala Thammasop, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-35-45.mp4"
cap = cv2.VideoCapture(video_path)

# For FPS calculation
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Start timing
    start_time = time.time()

    # Inference on GPU
    results = model.predict(source=frame, device=0, show=False, conf=0.5)

    # Annotate frame
    annotated_frame = results[0].plot()

    # Calculate FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time

    # Overlay FPS on the frame
    cv2.putText(annotated_frame, f'FPS: {fps:.2f}', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("YOLO Detection [GPU]", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 1 railway, 6.7ms
Speed: 2.3ms preprocess, 6.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.8ms
Speed: 2.7ms preprocess, 7.8ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.9ms
Speed: 2.5ms preprocess, 7.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.1ms
Speed: 2.0ms preprocess, 7.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.4ms
Speed: 2.3ms preprocess, 8.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.3ms
Speed: 2.5ms preprocess, 8.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 6.5ms
Speed: 2.1ms preprocess, 6.5ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.3ms
Speed: 2.4ms preprocess, 7.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


In [140]:
from ultralytics import YOLO
import cv2
import time
import numpy as np

model = YOLO(r"C:\vs\Railway Seg.v1i.yolov8\runs\segment\train2\weights\best.pt").to("cuda")

cap = cv2.VideoCapture(
    r"C:\Users\User\Documents\Drivers view Thailand, Wongwian Yai to Maha Chai, Feb 2025 - YouTube - Google Chrome 2025-06-21 14-12-05.mp4"
)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
center_x = frame_width // 2
prev_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model.predict(source=frame, device=0, conf=0.5)
    masks = results[0].masks
    annotated_frame = frame.copy()

    if masks is not None:
        center_x1 = int(frame_width * 0.4)
        center_x2 = int(frame_width * 0.6)
        best_overlap = 0
        best_mask_resized = None

        masks_np = masks.data.cpu().numpy()  # Convert once

        for mask in masks_np:
            mask_resized = cv2.resize(mask, (frame_width, frame_height), interpolation=cv2.INTER_NEAREST)

            central_region = mask_resized[:, center_x1:center_x2]
            overlap_ratio = np.mean(central_region > 0.5)

            if overlap_ratio > best_overlap:
                best_overlap = overlap_ratio
                best_mask_resized = mask_resized

        if best_overlap > 0.05:
            binary_mask = (best_mask_resized > 0.5).astype(np.uint8)

            # Perspective-aware dilation (bottom = wider, top = narrower)
            dilated_mask = np.zeros_like(binary_mask)
            rows = binary_mask.shape[0]

            for y in range(rows - 1, -1, -5):  # step every 5 rows to save FPS
                max_width = 500  # or whatever looks right for the bottom
                min_width = -500    # desired top width
                dilation_width = int(min_width + (max_width - min_width) * (y / rows))





                kernel_size = dilation_width if dilation_width % 2 == 1 else dilation_width + 1
                if kernel_size > 1:
                    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, 1))
                    slice_mask = binary_mask[y:y+5, :]  # 5-row horizontal band
                    dilated_slice = cv2.dilate(slice_mask, kernel, iterations=1)
                    dilated_mask[y:y+5, :] = dilated_slice

            binary_mask = dilated_mask
            binary_mask_3c = np.repeat(binary_mask[:, :, np.newaxis], 3, axis=2)


            # Colorize mask (blue)
            color_mask = np.zeros_like(annotated_frame, dtype=np.uint8)
            color_mask[:, :, 0] = 255  # Blue channel only

            # Blend
            alpha = 0.5
            blended = cv2.addWeighted(color_mask, alpha, annotated_frame, 1 - alpha, 0)
            annotated_frame = np.where(binary_mask_3c == 1, blended, annotated_frame)



    # FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time
    cv2.putText(annotated_frame, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow("YOLOv8 Segmentation Optimized", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 1 railway, 7.1ms
Speed: 2.0ms preprocess, 7.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.2ms
Speed: 2.0ms preprocess, 8.2ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.4ms
Speed: 2.1ms preprocess, 8.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.7ms
Speed: 1.8ms preprocess, 7.7ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 8.0ms
Speed: 2.9ms preprocess, 8.0ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.9ms
Speed: 3.1ms preprocess, 7.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.4ms
Speed: 1.7ms preprocess, 7.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 railway, 7.0ms
Speed: 2.4ms preprocess, 7.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
