In [1]:
import cv2
import mediapipe as mp
import time # to check the frame rate

In [2]:
cap = cv2.VideoCapture(0) #this opens the default webcam (index 0)
mphands = mp.solutions.hands #give access to hand tracking sol
hands = mphands.Hands() #creates an instance to detect hands 
# this sets up a model that detects and tracks up to 2 hands in the image 

mpdraw = mp.solutions.drawing_utils

ptime = 0
ctime = 0

mp.solutions.Hands: Gives access to the Hand Tracking solution.

mpHands.Hands(): Creates an instance to detect hands.

This sets up a model that detects and tracks up to 2 hands in the image.

In [None]:
while True : #starts frame by frame video processing
    success, img = cap.read() #reads a frame from the webcam 
    imgrgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgrgb)
    # print(results.multi_hand_landmarks)

    if results.multi_hand_landmarks :
        for handLms in results.multi_hand_landmarks :
            for id, lm in enumerate(handLms.landmark):
                # print(id, lm) #these are the ratios of image we need to multiply width and height for getting the pixel vals
                h, w, c = img.shape #height width and the channels
                cx, cy = int(lm.x*w), int(lm.y*h)
                print(id, cx, cy) #prints the id along with the landmarks position pixels

                if id == 0 : #draw the circle for landmark 1
                    cv2.circle(img, (cx,cy), 25, (255,0,255), cv2.FILLED)

            mpdraw.draw_landmarks(img, handLms, mphands.HAND_CONNECTIONS)

    ctime = time.time() #current time
    fps = 1/(ctime-ptime) #fps formula
    ptime = ctime   #update previous time 

    cv2.putText(
        img,
        str(int(fps)),
        (10,70), #position
        cv2.FONT_HERSHEY_PLAIN, #font
        3,
        (255,0,255), #color
        3 #thickness
    )

    cv2.imshow("image", img)
    cv2.waitKey(1)

0 105 491
1 163 485
2 207 462
3 235 430
4 253 388
5 187 398
6 215 336
7 228 289
8 240 249
9 144 391
10 162 325
11 175 273
12 188 228
13 101 399
14 114 343
15 126 299
16 139 261
17 61 418
18 55 373
19 52 340
20 48 308
0 100 501
1 162 493
2 208 466
3 238 433
4 255 389
5 189 398
6 218 336
7 234 292
8 249 252
9 148 390
10 167 320
11 181 266
12 195 216
13 103 400
14 115 340
15 128 297
16 142 260
17 60 421
18 54 373
19 51 340
20 49 309
0 110 486
1 173 476
2 221 442
3 243 396
4 266 351
5 199 361
6 224 302
7 239 262
8 254 226
9 157 351
10 171 284
11 182 238
12 196 199
13 114 359
14 126 295
15 136 251
16 149 212
17 71 382
18 63 329
19 61 292
20 63 258
0 113 492
1 174 481
2 223 445
3 246 399
4 268 354
5 202 361
6 227 302
7 243 263
8 257 230
9 160 350
10 176 276
11 190 224
12 202 179
13 115 358
14 127 291
15 138 249
16 151 211
17 71 382
18 64 329
19 62 293
20 62 260
0 122 484
1 183 466
2 228 425
3 249 376
4 276 334
5 206 340
6 232 279
7 248 240
8 262 206
9 164 329
10 180 253
11 192 203
12 204 161

KeyboardInterrupt: 

: 

Success :TRUE if the frame was read successfully.

img: The actual image (frame) captured.

OpenCV captures in BGR, but MediaPipe needs RGB, so you convert it.

Processes the frame to detect hand landmarks.

results contains all the information like number of hands, landmarks, handedness, etc.

prints the landmark coordinates if hands are detected, else None.

imshow: Shows the live webcam feed in a window titled "image".

waitKey(1): Waits 1 ms before moving to the next frame (keeps window responsive).

mpdraw = mp.solutions.drawing_utils is a helper module for visualization 
it contains ready made functions to draw landmarks and connections on images, like skeletons for hands, face mesh etc 

mphands.HAND_CONNECTIONS is used to make connection lines 