<a href="https://colab.research.google.com/github/bonosa/robot_arm_demo/blob/main/mediapipe_hand_robotics_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MediaPipe Hand Tracking → Robotics (Colab Notebook)
End‑to‑end pipeline: capture 21 hand landmarks with MediaPipe, convert them to joint angles, and optionally log a dataset for ML.

In [4]:
!pip install -q "protobuf<4" mediapipe==0.10.9 opencv-python h5py >log.txt

In [5]:
import mediapipe as mp, cv2, numpy as np
print("MediaPipe version:", mp.__version__)


MediaPipe version: 0.10.9


In [3]:
print (error.stderr)


NameError: name 'error' is not defined

## Initialise tracker

In [6]:
import cv2, mediapipe as mp, numpy as np

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    model_complexity=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6,
)
print("MediaPipe Hands initialised ✔️")

MediaPipe Hands initialised ✔️


## Helper: extract 21 × 3 landmark array from a frame

In [7]:
def get_landmarks(frame):
    """Return a (21, 3) ndarray of (x, y, z) or None if no hand detected."""
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    res = hands.process(rgb)
    if res.multi_hand_landmarks:
        lm = res.multi_hand_landmarks[0]
        return np.array([(p.x, p.y, p.z) for p in lm.landmark], dtype=np.float32)
    return None

## Helper: convert landmarks → finger‑joint angles (degrees)

In [8]:
# Mapping of finger bones: (proximal idx, joint idx, distal idx)
_triplets = {
    'thumb_MCP':  (0,  2,  4),
    'index_MCP':  (0,  5,  8),
    'index_PIP':  (5,  6,  8),
    'index_DIP':  (6,  7,  8),
    'middle_MCP': (0,  9, 12),
    'middle_PIP': (9, 10, 12),
    'middle_DIP': (10,11,12),
    'ring_MCP':   (0, 13, 16),
    'ring_PIP':   (13,14,16),
    'ring_DIP':   (14,15,16),
    'pinky_MCP':  (0, 17, 20),
    'pinky_PIP':  (17,18,20),
    'pinky_DIP':  (18,19,20),
}

def bone_angle(a, b, c):
    """Return angle ABC in degrees."""
    ba, bc = a - b, c - b
    cosang = np.clip(
        np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-8),
        -1.0, 1.0
    )
    return float(np.degrees(np.arccos(cosang)))

def landmarks_to_angles(lm):
    """Map 21 × 3 landmarks → dict of named joint angles."""
    return {name: bone_angle(lm[i], lm[j], lm[k]) for name, (i, j, k) in _triplets.items()}

## (Optional) HDF5 logger: store landmarks + joint angles

In [9]:
import h5py, time

def create_session(path="teleop_sessions.h5"):
    h5 = h5py.File(path, "a")
    grp = h5.create_group(f"session_{int(time.time())}")
    grp.create_dataset("landmarks", (0,21,3), maxshape=(None,21,3), chunks=True)
    grp.create_dataset("joint_angles", (0,len(_triplets)), maxshape=(None,len(_triplets)), chunks=True)
    return h5, grp

def append_frame(grp, lm, ang):
    ls, js = grp["landmarks"], grp["joint_angles"]
    idx = ls.shape[0]
    ls.resize(idx+1, axis=0)
    js.resize(idx+1, axis=0)
    ls[idx] = lm
    js[idx] = [ang[k] for k in _triplets.keys()]

In [11]:
import cv2

# --- adjust if your frames are a different size or FPS
FRAME_SIZE = (640, 480)
FPS        = 30.0

fourcc = cv2.VideoWriter_fourcc(*'mp4v')          # MP4 codec
out    = cv2.VideoWriter('annotated.mp4',
                         fourcc,
                         FPS,
                         FRAME_SIZE)
print("VideoWriter ready ✔️")


VideoWriter ready ✔️


## Demo: upload a short video, extract data

In [22]:
from google.colab import files
import cv2

# ── Upload a clip ─────────────────────────
uploaded   = files.upload()
video_path = next(iter(uploaded))          # first file
cap        = cv2.VideoCapture(video_path)

# ── Prepare the MP4 writer ───────────────
FRAME_SIZE = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
              int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
FPS        = cap.get(cv2.CAP_PROP_FPS) or 30.0          # fallback

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out    = cv2.VideoWriter('annotated.mp4', fourcc, FPS, FRAME_SIZE)

# Optional: open your HDF5 session (unchanged)
h5, grp = create_session()

print(f"Source FPS ≈ {FPS:.1f}, resolution {FRAME_SIZE} → writer ready ✔️")


Saving WIN_20250619_19_48_33_Pro.mp4 to WIN_20250619_19_48_33_Pro.mp4
Source FPS ≈ 19.9, resolution (1280, 720) → writer ready ✔️


In [23]:
import mediapipe as mp, numpy as np
from mediapipe.framework.formats import landmark_pb2   # ← NEW import

mp_drawing = mp.solutions.drawing_utils
mp_hands   = mp.solutions.hands

while True:
    ret, frame = cap.read()
    if not ret:
        break

    lm = get_landmarks(frame)
    if lm is None:
        continue

    # ── build an in-memory NormalizedLandmarkList ──────────────────
    lm_list = landmark_pb2.NormalizedLandmarkList()
    for x, y, z in lm:
        lmk = lm_list.landmark.add()
        lmk.x, lmk.y, lmk.z = float(x), float(y), float(z)

    # ── draw the green skeleton ────────────────────────────────────
    mp_drawing.draw_landmarks(
        frame,
        lm_list,
        mp_hands.HAND_CONNECTIONS)

    # log + save (unchanged)
    ang = landmarks_to_angles(lm)
    append_frame(grp, lm, ang)
    out.write(frame)

print("Processing finished ✔️")


Processing finished ✔️


In [24]:
# --- tidy-up ---------------------------------------------------
cap.release()     # stop reading frames
out.release()     # finish writing annotated.mp4
h5.close()        # flush & close teleop_sessions.h5

print("✅  Files closed.")
print("• Dataset : teleop_sessions.h5")
print("• Video    : annotated.mp4")


✅  Files closed.
• Dataset : teleop_sessions.h5
• Video    : annotated.mp4


In [25]:
from google.colab import files
files.download('annotated.mp4')
files.download('teleop_sessions.h5')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

OSError: MoviePy error: the file /mnt/data/annotated_twitter.mp4 could not be found!
Please check that you entered the correct path.

> **Local Jupyter?** Replace the `files.upload()` block with `cap = cv2.VideoCapture(0)` to stream from your webcam.