<a href="https://colab.research.google.com/github/haysnairpa/stairvision/blob/main/stairvision_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ultralytics opencv-python-headless numpy

Collecting ultralytics
  Downloading ultralytics-8.3.177-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.15-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [25]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [58]:
import cv2
import numpy as np
from ultralytics import YOLO

In [59]:
seg_model_path = "/content/drive/MyDrive/stairvision/model/best_stair_handrail_model.pt"
pose_model_path = "/content/drive/MyDrive/stairvision/model/best_pose_model.pt"

seg_model = YOLO(seg_model_path)
pose_model = YOLO(pose_model_path)

In [60]:
video_path = "/content/drive/MyDrive/stairvision/dataset/east/videos/Copy of Copy of IMG_3087.MOV"
output_path = "/content/drive/MyDrive/stairvision/dataset/east/videos/output_video/output_handrail_pose2.mp4"

In [61]:
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [62]:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

In [63]:
import time
import cv2
import numpy as np

start_time = time.time()

DILATE_KERNEL_SIZE = 13
CLOSE_KERNEL_SIZE = 7
WRIST_DISTANCE_PX = max(25, int(0.05 * max(width, height)))
KP_CONF_THRESH = 0.25

frame_num = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_orig = frame.copy()

    seg_results = seg_model.predict(frame, conf=0.5, verbose=False)
    handrail_mask = np.zeros((height, width), dtype=np.uint8)
    for r in seg_results:
        if r.masks is not None:
            for mask_poly, cls in zip(r.masks.xy, r.boxes.cls):
                if int(cls) == 0:
                    poly = np.array(mask_poly, dtype=np.int32)
                    cv2.fillPoly(handrail_mask, [poly], 255)

    close_kernel = np.ones((CLOSE_KERNEL_SIZE, CLOSE_KERNEL_SIZE), np.uint8)
    handrail_mask = cv2.morphologyEx(handrail_mask, cv2.MORPH_CLOSE, close_kernel)
    dilate_kernel = np.ones((DILATE_KERNEL_SIZE, DILATE_KERNEL_SIZE), np.uint8)
    handrail_mask = cv2.dilate(handrail_mask, dilate_kernel, iterations=2)

    inv_mask = cv2.bitwise_not(handrail_mask)
    dist_map = cv2.distanceTransform(inv_mask, cv2.DIST_L2, 5)

    pose_results = pose_model.predict(frame_orig, conf=0.25, verbose=False)

    overlay = frame.copy()
    overlay[handrail_mask > 0] = (255, 0, 0)
    frame_vis = cv2.addWeighted(overlay, 0.35, frame, 0.65, 0)

    for r in pose_results:
        if r.keypoints is None:
            continue

        kpts_xy = r.keypoints.xy.cpu().numpy()
        try:
            kpts_conf = r.keypoints.conf.cpu().numpy()
        except Exception:
            kpts_conf = np.ones((kpts_xy.shape[0], kpts_xy.shape[1]))

        for person_idx in range(kpts_xy.shape[0]):
            person_kpts = kpts_xy[person_idx]
            person_conf = kpts_conf[person_idx]
            hand_kpts_idx = [7, 8, 9, 10]
            holding = False

            for idx in hand_kpts_idx:
                if idx >= person_kpts.shape[0]:
                    continue
                x, y = int(person_kpts[idx][0]), int(person_kpts[idx][1])
                conf_val = person_conf[idx]
                if conf_val < KP_CONF_THRESH:
                    continue
                if not (0 <= x < width and 0 <= y < height):
                    continue
                dist_px = dist_map[y, x]
                if dist_px <= WRIST_DISTANCE_PX:
                    holding = True
                    cv2.circle(frame_vis, (x, y), 6, (0, 255, 0), -1)
                else:
                    cv2.circle(frame_vis, (x, y), 6, (0, 0, 255), -1)

            label_pos = (int(person_kpts[0][0]), int(person_kpts[0][1]) - 10) if person_kpts.shape[0] > 0 else (10,30)
            if holding:
                cv2.putText(frame_vis, "Holding Handrail", label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
            else:
                cv2.putText(frame_vis, "Not Holding", label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)

    out.write(frame_vis)
    frame_num += 1

    elapsed = time.time() - start_time
    fps_proc = frame_num / elapsed if elapsed > 0 else 0
    eta = (total_frames - frame_num) / fps_proc if fps_proc > 0 else 0
    print(f"Frame {frame_num}/{total_frames} | {fps_proc:.2f} FPS | ETA: {eta/60:.1f} min", flush=True)

cap.release()
out.release()
print(f"✅ Video saved to: {output_path}")

Frame 1/383 | 0.22 FPS | ETA: 29.0 min
Frame 2/383 | 0.28 FPS | ETA: 22.8 min
Frame 3/383 | 0.33 FPS | ETA: 19.1 min
Frame 4/383 | 0.37 FPS | ETA: 17.2 min
Frame 5/383 | 0.39 FPS | ETA: 16.1 min
Frame 6/383 | 0.41 FPS | ETA: 15.3 min
Frame 7/383 | 0.43 FPS | ETA: 14.7 min
Frame 8/383 | 0.42 FPS | ETA: 15.1 min
Frame 9/383 | 0.42 FPS | ETA: 14.9 min
Frame 10/383 | 0.43 FPS | ETA: 14.6 min
Frame 11/383 | 0.43 FPS | ETA: 14.3 min
Frame 12/383 | 0.44 FPS | ETA: 14.0 min
Frame 13/383 | 0.45 FPS | ETA: 13.8 min
Frame 14/383 | 0.45 FPS | ETA: 13.6 min
Frame 15/383 | 0.44 FPS | ETA: 13.9 min
Frame 16/383 | 0.44 FPS | ETA: 13.8 min
Frame 17/383 | 0.45 FPS | ETA: 13.6 min
Frame 18/383 | 0.45 FPS | ETA: 13.5 min
Frame 19/383 | 0.46 FPS | ETA: 13.3 min
Frame 20/383 | 0.46 FPS | ETA: 13.2 min
Frame 21/383 | 0.46 FPS | ETA: 13.2 min
Frame 22/383 | 0.45 FPS | ETA: 13.3 min
Frame 23/383 | 0.46 FPS | ETA: 13.2 min
Frame 24/383 | 0.46 FPS | ETA: 13.1 min
Frame 25/383 | 0.46 FPS | ETA: 13.0 min
Frame 26/

KeyboardInterrupt: 