### All imports

In [1]:
import cv2
from youtube_transcript_api import YouTubeTranscriptApi
from pytube import YouTube
import collections
import cv2


from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

import nltk  


### engine for pose estimation

In [11]:
base_options = python.BaseOptions(model_asset_path='../models/pose_landmarker.task')
options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    output_segmentation_masks=True)
mp_pose = mp.solutions.pose
detector = mp_pose.Pose(static_image_mode=False,min_detection_confidence=0.8,min_tracking_confidence=0.8)
#detector = vision.PoseLandmarker.create_from_options(options)

In [4]:
from mediapipe.python.solutions import drawing_utils, pose


def draw_landmarks_on_image(rgb_image, detection_result):
    pose_landmarks_list = detection_result.pose_landmarks.landmark
    annotated_image = np.copy(rgb_image)

    # Create NormalizedLandmarkList to hold the detected landmarks
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z)
        for landmark in pose_landmarks_list
    ])

    # Draw landmarks on the image
    drawing_utils.draw_landmarks(
        annotated_image,
        pose_landmarks_proto,
        pose.POSE_CONNECTIONS,  # Ensure POSE_CONNECTIONS matches the number of landmarks detected
        drawing_utils.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
        drawing_utils.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
    )

    return annotated_image


def calculate_angle(p1, p2):
    x_diff = p2[0] - p1[0]
    y_diff = p2[1] - p1[1]
    return np.degrees(np.arctan2(y_diff, x_diff))

# Funkcja do obliczania kąta obrotu sylwetki
def calculate_body_rotation_angle(landmarks):
    if landmarks is None:
      return None
    # Wykryte punkty charakterystyczne dla ramion (np. 11 i 12 dla lewego i prawego ramienia)
    left_shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x,
                     landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y]
    right_shoulder = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].x,
                      landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].y]

    # Oblicz kąt nachylenia linii ramion
    angle = calculate_angle(left_shoulder, right_shoulder)
    return angle

def calculate_angle_3d(p1, p2):
    x_diff = p2[0] - p1[0]
    y_diff = p2[1] - p1[1]
    z_diff = p2[2] - p1[2]
    # Obliczamy kąt w płaszczyźnie xy (poziomej)
    angle_xy = np.degrees(np.arctan2(y_diff, x_diff))
    # Obliczamy kąt w płaszczyźnie yz (pionowej)
    angle_yz = np.degrees(np.arctan2(z_diff, y_diff))
    return angle_xy, angle_yz

# Funkcja do obliczania kąta obrotu sylwetki w 3D
def calculate_body_rotation_angle_3d(landmarks_3d):
    if landmarks_3d is None:
        return None
    # Wykryte punkty charakterystyczne dla ramion w 3D
    left_shoulder_3d = [landmarks_3d['left_shoulder_x'],
                        landmarks_3d['left_shoulder_y'],
                        landmarks_3d['left_shoulder_z']]
    right_shoulder_3d = [landmarks_3d['right_shoulder_x'],
                         landmarks_3d['right_shoulder_y'],
                         landmarks_3d['right_shoulder_z']]

    # Oblicz kąt nachylenia linii ramion w płaszczyźnie xy i yz
    angle_xy, angle_yz = calculate_angle_3d(left_shoulder_3d, right_shoulder_3d)
    return angle_xy, angle_yz

In [4]:
#xwyPjhRoeNc
#nhoikoUEI8U
video_id = "nhoikoUEI8U"
subtitles = YouTubeTranscriptApi.get_transcript(video_id)
print(len(subtitles))

yt = YouTube(f"https://www.youtube.com/watch?v={video_id}")
stream = yt.streams.filter(res="720p").first()
#print(yt.streams.filter(res="720p").first())
destination_path = "../videos" 

video_file = stream.download(output_path=destination_path)


def cv2_to_mediapipe_image(cv2_image):
    rgb_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
    image = mp.solutions.mediapipe.python.solution_base.Image(
        width=rgb_image.shape[1],
        height=rgb_image.shape[0],
        rgb_data=np.frombuffer(rgb_image.tobytes(), dtype=np.uint8)
    )

    return image

129


In [10]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np

# Funkcja do skalowania obrazu
def resize_image(image, target_size):
    height, width = image.shape[:2]
    if height > width:
        scale = target_size / height
    else:
        scale = target_size / width
    new_height = int(height * scale)
    new_width = int(width * scale)
    resized_image = cv2.resize(image, (new_width, new_height))
    padded_image = np.zeros((target_size, target_size, 3), dtype=np.uint8)
    padded_image[:new_height, :new_width, :] = resized_image
    return padded_image

# Wczytaj model MoveNet
model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet = model.signatures['serving_default']

# Wczytaj wideo
cap = cv2.VideoCapture(video_file)
current_frame = 0
last_frame = 0
while cap.isOpened():
    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
    ret, frame = cap.read()
    if not ret:
        break
    if current_frame !=last_frame:

        # Skaluj obraz do rozmiaru akceptowalnego przez MoveNet
        input_image = resize_image(frame, 192)

        # Przygotuj obraz do przetwarzania przez model
        input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32)
        input_image = tf.expand_dims(input_image, axis=0)
        input_image = tf.cast(input_image, dtype=tf.int32)

        # Wykrywanie postury
        keypoints_with_scores = movenet(input_image)['output_0'].numpy()

        # Przetwarzanie wyników
        keypoints = keypoints_with_scores[0, 0, :, :2]
        scores = keypoints_with_scores[0, 0, :, 2]
        print(scores)
        print(keypoints)
        # Rysowanie wykrytych kluczowych punktów
        for keypoint, score in zip(keypoints, scores):
            #if score > 0.3:  # Próg ufności
                x, y = int(keypoint[1] * frame.shape[1]), int(keypoint[0] * frame.shape[0])
                cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)

        # Wyświetlanie obrazu z wykrytymi kluczowymi punktami
    cv2.imshow('MoveNet Pose Detection', frame)
    key = cv2.waitKey(30)  # Adjust the delay as needed (milliseconds)
    last_frame = current_frame
    if key == 27:  # ESC key to exit
        break
    elif key == 83 or key == 100:
        current_frame += 1
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72336835]
 [0.46516585 0.3843128 ]
 [0.549841   0.620606  ]
 [0.572905   0.44732767]
 [0.35410318 0.57292455]
 [0.35767597 0.4610269 ]
 [0.37261423 0.5195781 ]
 [0.37208122 0.48017925]
 [0.54765284 0.49333712]
 [0.54775256 0.52329355]]
[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72

[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72336835]
 [0.46516585 0.3843128 ]
 [0.549841   0.620606  ]
 [0.572905   0.44732767]
 [0.35410318 0.57292455]
 [0.35767597 0.4610269 ]
 [0.37261423 0.5195781 ]
 [0.37208122 0.48017925]
 [0.54765284 0.49333712]
 [0.54775256 0.52329355]]
[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72

[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72336835]
 [0.46516585 0.3843128 ]
 [0.549841   0.620606  ]
 [0.572905   0.44732767]
 [0.35410318 0.57292455]
 [0.35767597 0.4610269 ]
 [0.37261423 0.5195781 ]
 [0.37208122 0.48017925]
 [0.54765284 0.49333712]
 [0.54775256 0.52329355]]
[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72

[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72336835]
 [0.46516585 0.3843128 ]
 [0.549841   0.620606  ]
 [0.572905   0.44732767]
 [0.35410318 0.57292455]
 [0.35767597 0.4610269 ]
 [0.37261423 0.5195781 ]
 [0.37208122 0.48017925]
 [0.54765284 0.49333712]
 [0.54775256 0.52329355]]
[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72

[0.04262676 0.05331732 0.04193732 0.03090977 0.02437305 0.00703724
 0.01519093 0.00877611 0.01292016 0.00954487 0.07706343 0.06094895
 0.06675536 0.00880223 0.01231342 0.0078762  0.01601369]
[[0.36509916 0.4536764 ]
 [0.36259505 0.4952658 ]
 [0.3673496  0.44698083]
 [0.35495397 0.47495458]
 [0.45285174 0.54249907]
 [0.40176702 0.7070281 ]
 [0.40055117 0.4580385 ]
 [0.446836   0.70369434]
 [0.43529993 0.3809557 ]
 [0.5311186  0.6026074 ]
 [0.33395267 0.02946509]
 [0.37342125 0.5705134 ]
 [0.36012778 0.46162516]
 [0.3977078  0.52014494]
 [0.37366855 0.4798549 ]
 [0.5667834  0.50836194]
 [0.54808694 0.52312416]]
[0.04226888 0.05299651 0.04157915 0.03067237 0.0242344  0.00704715
 0.01518809 0.00875572 0.01294492 0.0095055  0.07725534 0.06094558
 0.06672024 0.00880925 0.01231705 0.00786787 0.01601429]
[[0.3650834  0.45355198]
 [0.36259526 0.495111  ]
 [0.36735135 0.4468426 ]
 [0.35497352 0.47474748]
 [0.4529775  0.54240793]
 [0.40197417 0.7069966 ]
 [0.40057814 0.45804068]
 [0.4468249  0.70

[0.04189413 0.05264891 0.04133147 0.02686218 0.0244113  0.00813626
 0.01545093 0.00890186 0.01287976 0.00958982 0.07780266 0.06103292
 0.06681222 0.0088812  0.01252531 0.00793207 0.01616305]
[[0.36533862 0.45310864]
 [0.36302364 0.49455446]
 [0.3677402  0.446452  ]
 [0.37942713 0.5019617 ]
 [0.45312223 0.5418203 ]
 [0.4001145  0.684024  ]
 [0.40085626 0.4575079 ]
 [0.44667527 0.70407474]
 [0.43501967 0.38094908]
 [0.53117585 0.6027133 ]
 [0.33404702 0.0293706 ]
 [0.373594   0.5706854 ]
 [0.36037755 0.46170282]
 [0.39765126 0.51993144]
 [0.37370703 0.47967935]
 [0.56683457 0.5082468 ]
 [0.54813987 0.52312994]]
[0.04216561 0.05276872 0.04155019 0.02662631 0.02438004 0.00806713
 0.0153904  0.00885277 0.01293653 0.00957374 0.07740647 0.0610985
 0.06678517 0.00885438 0.01246527 0.00791435 0.01614879]
[[0.36528358 0.45334798]
 [0.36291227 0.4948103 ]
 [0.36764297 0.4466858 ]
 [0.37928784 0.50219727]
 [0.4530687  0.5420569 ]
 [0.40018654 0.68413067]
 [0.40077624 0.4577381 ]
 [0.44670153 0.703

[0.04325195 0.05400963 0.04268695 0.03126504 0.02452799 0.00711797
 0.01522162 0.00882416 0.013034   0.0095474  0.07359337 0.06111651
 0.06686931 0.0087458  0.0122601  0.00785749 0.01604213]
[[0.36519343 0.4537902 ]
 [0.36267352 0.4954315 ]
 [0.36740384 0.4471557 ]
 [0.3547929  0.47537804]
 [0.45291123 0.54252243]
 [0.4020399  0.706983  ]
 [0.40041763 0.45811626]
 [0.44668838 0.70362276]
 [0.43544668 0.38117355]
 [0.5310211  0.6026685 ]
 [0.3335821  0.03004571]
 [0.37335438 0.5705358 ]
 [0.3599617  0.46154532]
 [0.39743906 0.5201713 ]
 [0.37335813 0.4799144 ]
 [0.5667259  0.5084184 ]
 [0.5479846  0.52322435]]
[0.04343118 0.05407448 0.04283143 0.03119869 0.02471011 0.00727843
 0.01528103 0.00893895 0.01308757 0.00958482 0.07283583 0.06175528
 0.06699    0.00879124 0.01229978 0.00786824 0.0161212 ]
[[0.3651082  0.4541176 ]
 [0.36256263 0.49569842]
 [0.36731142 0.4475058 ]
 [0.35471317 0.4756224 ]
 [0.4528154  0.5427965 ]
 [0.40234202 0.70705265]
 [0.40039465 0.4582996 ]
 [0.44696617 0.70

[0.0356307  0.03508768 0.04163165 0.03405374 0.02465758 0.0076005
 0.0146767  0.01185297 0.00912147 0.02070284 0.01818988 0.03451045
 0.03911826 0.0081738  0.00955514 0.01386502 0.03237128]
[[0.37593335 0.5416272 ]
 [0.34821767 0.5797878 ]
 [0.35533923 0.50156915]
 [0.37606567 0.47805667]
 [0.40186048 0.47340167]
 [0.30872416 0.69790554]
 [0.37528905 0.43088806]
 [0.37332225 0.74219453]
 [0.3351648  0.30328143]
 [0.43191752 0.6322707 ]
 [0.42911464 0.39425763]
 [0.33238792 0.6056578 ]
 [0.31317148 0.47684023]
 [0.3052715  0.5396651 ]
 [0.3294388  0.49944174]
 [0.5262664  0.510608  ]
 [0.48501086 0.5303884 ]]
[0.0356559  0.03510805 0.04207506 0.03398537 0.02474998 0.00676056
 0.01476456 0.01182071 0.00933211 0.02072511 0.01831901 0.03457338
 0.03956256 0.00825759 0.00963147 0.01388941 0.03235656]
[[0.3759727  0.5418087 ]
 [0.3483057  0.57997215]
 [0.35539696 0.50162697]
 [0.37602788 0.4778291 ]
 [0.40189493 0.47339377]
 [0.28610668 0.6942043 ]
 [0.37522468 0.43087143]
 [0.37329555 0.742

[0.03567246 0.03353423 0.04238598 0.03352674 0.02447145 0.00680235
 0.01405881 0.01160808 0.00882671 0.02061221 0.01828464 0.03413723
 0.03870602 0.00823225 0.00665619 0.01389682 0.03195759]
[[0.37561533 0.5428385 ]
 [0.34782037 0.5811659 ]
 [0.35480198 0.5027063 ]
 [0.37569508 0.4794707 ]
 [0.4011625  0.47412366]
 [0.2861134  0.6942827 ]
 [0.37620574 0.40892887]
 [0.37348533 0.74213386]
 [0.3355109  0.30338785]
 [0.43207324 0.63233   ]
 [0.42940134 0.3941315 ]
 [0.33241147 0.6051998 ]
 [0.3125947  0.4760786 ]
 [0.30514386 0.5395893 ]
 [0.30384403 0.47882065]
 [0.526626   0.51070964]
 [0.48545563 0.5304191 ]]
[0.03588153 0.03368832 0.04252854 0.03366143 0.02462433 0.00677529
 0.01416045 0.01162872 0.00875264 0.02063388 0.01828663 0.03400189
 0.03848208 0.00820141 0.00665996 0.01392035 0.03207856]
[[0.37563607 0.5428138 ]
 [0.3478462  0.5811044 ]
 [0.3548144  0.50276756]
 [0.37576434 0.47962904]
 [0.40116033 0.47417092]
 [0.28605393 0.6942085 ]
 [0.37631983 0.40902337]
 [0.37357378 0.74

[0.03557407 0.03325818 0.04198276 0.03369907 0.02453977 0.00689333
 0.01419219 0.01208729 0.00886245 0.02076773 0.01831501 0.03332339
 0.03838868 0.00809117 0.00665996 0.01395485 0.03225623]
[[0.37576625 0.5430155 ]
 [0.3480389  0.5812342 ]
 [0.35494548 0.50295156]
 [0.3758758  0.47951752]
 [0.40128165 0.47432   ]
 [0.28610373 0.69485945]
 [0.3762805  0.4091736 ]
 [0.37341842 0.74238324]
 [0.33500546 0.30352446]
 [0.43201134 0.63238126]
 [0.42919788 0.39413053]
 [0.33229527 0.6056143 ]
 [0.31245106 0.4762294 ]
 [0.30497986 0.53969216]
 [0.30371794 0.47883862]
 [0.52644324 0.51061743]
 [0.48530954 0.53018236]]
[0.03564369 0.05750372 0.04215525 0.03417173 0.02473464 0.00690022
 0.01450581 0.01191308 0.0090921  0.02079842 0.01857239 0.03363709
 0.0387557  0.00819193 0.0067428  0.01404409 0.03236118]
[[0.37585235 0.5431489 ]
 [0.35463816 0.5245334 ]
 [0.355098   0.50310355]
 [0.37602895 0.47927922]
 [0.40140748 0.47435135]
 [0.28626984 0.6949662 ]
 [0.37637937 0.40928388]
 [0.373433   0.74

[0.03418813 0.05983014 0.04153493 0.03281183 0.0241876  0.00968632
 0.01224653 0.01095827 0.00864999 0.01975664 0.01782424 0.03502869
 0.04756119 0.00826606 0.00965285 0.01356572 0.03106388]
[[0.37680757 0.54099584]
 [0.3559745  0.5225077 ]
 [0.38091213 0.5051217 ]
 [0.3774649  0.47717148]
 [0.40322894 0.47307143]
 [0.33212146 0.7024213 ]
 [0.35570186 0.4057536 ]
 [0.37257606 0.74173546]
 [0.33649898 0.30314076]
 [0.43170103 0.6318753 ]
 [0.42966837 0.39452797]
 [0.3335296  0.6061118 ]
 [0.33623546 0.47802758]
 [0.30531445 0.53963304]
 [0.3296013  0.49993855]
 [0.52722704 0.51084495]
 [0.48585266 0.5305791 ]]
[0.03407038 0.06053694 0.04146554 0.02995076 0.02463556 0.00953968
 0.01262757 0.01203164 0.00905951 0.0194766  0.01786787 0.03445292
 0.0478326  0.00848723 0.00997698 0.01354239 0.03092899]
[[0.37710246 0.54089165]
 [0.35653302 0.5221641 ]
 [0.38130736 0.50499237]
 [0.37544376 0.50318456]
 [0.4037488  0.47277272]
 [0.33198065 0.7016911 ]
 [0.3560344  0.4059814 ]
 [0.3697827  0.72

[0.03350342 0.05992989 0.04097951 0.02957263 0.0241736  0.00740962
 0.01263607 0.01228769 0.00923584 0.01940121 0.0177975  0.03510025
 0.04843155 0.00865055 0.01013538 0.01347076 0.03063503]
[[0.3770401  0.5409592 ]
 [0.35650584 0.5222046 ]
 [0.38124248 0.50515395]
 [0.37548965 0.50306845]
 [0.4037516  0.47295427]
 [0.30955246 0.6967699 ]
 [0.35594878 0.40601227]
 [0.3694468  0.7200632 ]
 [0.33608854 0.30334216]
 [0.43139586 0.6312462 ]
 [0.4288734  0.39470237]
 [0.33375877 0.605981  ]
 [0.3365335  0.47816288]
 [0.305678   0.5393778 ]
 [0.3300217  0.50026786]
 [0.52722216 0.51086414]
 [0.48553252 0.53051573]]
[0.03295239 0.05888686 0.04007182 0.02860729 0.02351161 0.00719908
 0.01206704 0.01218014 0.00880593 0.01929373 0.0174396  0.03501659
 0.04771217 0.00837458 0.00976985 0.01339981 0.0304058 ]
[[0.3770331  0.5407853 ]
 [0.35639843 0.5220725 ]
 [0.38127697 0.5050342 ]
 [0.37532017 0.5031752 ]
 [0.40376624 0.47303468]
 [0.30949342 0.69654894]
 [0.3558236  0.40569502]
 [0.36926267 0.72

[0.03142162 0.02996963 0.03881392 0.0283585  0.02349669 0.009717
 0.01250061 0.01181843 0.00898685 0.01869764 0.01739034 0.03358641
 0.04740942 0.00856543 0.00722108 0.01343095 0.03041567]
[[0.3766904  0.5419091 ]
 [0.3743893  0.5817234 ]
 [0.38100395 0.5062469 ]
 [0.37587735 0.5034948 ]
 [0.40392756 0.47362602]
 [0.33243287 0.7012057 ]
 [0.35593963 0.40689915]
 [0.369593   0.72025985]
 [0.33598253 0.3038698 ]
 [0.43162054 0.63154185]
 [0.42922816 0.39494476]
 [0.33340523 0.60564685]
 [0.33589178 0.47812375]
 [0.30592    0.53947395]
 [0.3052316  0.48003566]
 [0.5276081  0.5109405 ]
 [0.48584875 0.5304505 ]]
[0.0319433  0.03038387 0.03945117 0.02828219 0.02366211 0.00975842
 0.01262659 0.01182646 0.00879578 0.01896505 0.01744719 0.03453129
 0.04742541 0.00853874 0.00712583 0.01350985 0.0305828 ]
[[0.3765437  0.5419312 ]
 [0.37414882 0.581855  ]
 [0.380763   0.5063482 ]
 [0.3756188  0.50400084]
 [0.40363085 0.4738091 ]
 [0.33228326 0.7010769 ]
 [0.35593233 0.40704554]
 [0.36937422 0.7203

[0.03128764 0.0549761  0.03877696 0.0284213  0.02349487 0.00649729
 0.01214541 0.01115235 0.008765   0.01828425 0.01720523 0.03347505
 0.04560141 0.00842424 0.01005225 0.01348119 0.02455596]
[[0.37688825 0.54252714]
 [0.35680294 0.52359277]
 [0.3570993  0.50319123]
 [0.3760961  0.5034933 ]
 [0.4039877  0.47349125]
 [0.3068243  0.6749601 ]
 [0.35579705 0.40640637]
 [0.36741385 0.72005713]
 [0.3365378  0.3026619 ]
 [0.4312213  0.63035154]
 [0.42895776 0.39470762]
 [0.33327174 0.60419846]
 [0.33676454 0.45952472]
 [0.30558687 0.5179512 ]
 [0.33005536 0.5002918 ]
 [0.5286808  0.4923781 ]
 [0.50639707 0.5297402 ]]
[0.03129701 0.0546472  0.0387355  0.02801637 0.02322712 0.00647317
 0.01181577 0.0113675  0.00833667 0.01820394 0.01700243 0.03308058
 0.04666561 0.0081594  0.00978193 0.01336654 0.0296258 ]
[[0.37687808 0.5418564 ]
 [0.3566094  0.5233612 ]
 [0.38116068 0.5062043 ]
 [0.37595212 0.5038022 ]
 [0.40386635 0.47332335]
 [0.30674258 0.674237  ]
 [0.35588333 0.40630543]
 [0.36758786 0.72

[0.03205236 0.05554737 0.03886128 0.02779139 0.023362   0.00633338
 0.01377823 0.01132342 0.00833724 0.01846058 0.01723667 0.03219795
 0.03656362 0.00796243 0.00951938 0.01345545 0.02996271]
[[0.37729225 0.5414803 ]
 [0.35696    0.5230113 ]
 [0.3816495  0.505831  ]
 [0.3758416  0.5036597 ]
 [0.404103   0.47314677]
 [0.30685058 0.67414236]
 [0.37794614 0.40818703]
 [0.36794525 0.7199139 ]
 [0.33743793 0.30232173]
 [0.43169934 0.6308587 ]
 [0.4296797  0.39471182]
 [0.3328601  0.6043532 ]
 [0.3148752  0.45883057]
 [0.30495363 0.51802075]
 [0.3294033  0.500137  ]
 [0.5286132  0.49238938]
 [0.48616108 0.53129286]]
[0.03151044 0.05476684 0.03836447 0.02765547 0.02315992 0.0064055
 0.01381179 0.0112865  0.00854431 0.01835848 0.01718195 0.03232058
 0.04659259 0.00802656 0.0096191  0.01344134 0.02989776]
[[0.37727314 0.5417063 ]
 [0.35702455 0.52312815]
 [0.38166818 0.50605583]
 [0.3760212  0.50352955]
 [0.40423235 0.47328123]
 [0.3070575  0.67432   ]
 [0.37807542 0.4083287 ]
 [0.36795947 0.719

[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72336835]
 [0.46516585 0.3843128 ]
 [0.549841   0.620606  ]
 [0.572905   0.44732767]
 [0.35410318 0.57292455]
 [0.35767597 0.4610269 ]
 [0.37261423 0.5195781 ]
 [0.37208122 0.48017925]
 [0.54765284 0.49333712]
 [0.54775256 0.52329355]]
[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72

[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72336835]
 [0.46516585 0.3843128 ]
 [0.549841   0.620606  ]
 [0.572905   0.44732767]
 [0.35410318 0.57292455]
 [0.35767597 0.4610269 ]
 [0.37261423 0.5195781 ]
 [0.37208122 0.48017925]
 [0.54765284 0.49333712]
 [0.54775256 0.52329355]]
[0.03953465 0.05106837 0.0391318  0.02702235 0.02108929 0.00550704
 0.0131146  0.00643582 0.00956721 0.00844087 0.00526361 0.05762986
 0.06546496 0.00806069 0.01015033 0.00997389 0.01497809]
[[0.387168   0.45799077]
 [0.36115587 0.4984883 ]
 [0.39446506 0.42194182]
 [0.37890172 0.47758657]
 [0.4479012  0.5755491 ]
 [0.401615   0.70891804]
 [0.39847502 0.46018812]
 [0.47459847 0.72

[0.04690622 0.05411669 0.04029455 0.02947778 0.0224339  0.00644942
 0.01436907 0.00866329 0.01312564 0.01104776 0.00687383 0.06085986
 0.06709197 0.00890211 0.01150375 0.00842806 0.01542213]
[[0.34218848 0.4581343 ]
 [0.36051902 0.50409937]
 [0.36540884 0.45529878]
 [0.38059172 0.45702112]
 [0.445803   0.5788962 ]
 [0.40236542 0.7102735 ]
 [0.3984792  0.46317166]
 [0.45083377 0.72534704]
 [0.43835592 0.38290367]
 [0.5298248  0.6454648 ]
 [0.55273604 0.46679306]
 [0.37325907 0.5704437 ]
 [0.359019   0.4612758 ]
 [0.3725699  0.51937217]
 [0.37115723 0.45720872]
 [0.56550825 0.49138865]
 [0.54676735 0.52335846]]
[0.04719909 0.05457353 0.04040567 0.02954682 0.02252055 0.00646076
 0.0143744  0.00864555 0.0131074  0.01105634 0.00687342 0.06093914
 0.06707118 0.00888521 0.01149209 0.00842691 0.01541888]
[[0.34221077 0.45794398]
 [0.36052883 0.503922  ]
 [0.36541352 0.45511925]
 [0.38056594 0.4569107 ]
 [0.44577774 0.5787965 ]
 [0.40236065 0.71021336]
 [0.39849037 0.4630891 ]
 [0.4509083  0.72

[0.04805556 0.05558065 0.04077397 0.03128223 0.02273374 0.00646087
 0.01429555 0.00858186 0.01100313 0.0110713  0.05950038 0.06119501
 0.06710201 0.00886708 0.01145534 0.00841671 0.01548864]
[[0.34223956 0.457179  ]
 [0.36060736 0.50313914]
 [0.3654908  0.4542782 ]
 [0.35441756 0.457326  ]
 [0.44594407 0.57832855]
 [0.40228838 0.7100686 ]
 [0.39851052 0.46272498]
 [0.45109567 0.7251862 ]
 [0.46538723 0.38630515]
 [0.52990425 0.6454741 ]
 [0.33248127 0.03073619]
 [0.37335145 0.5704488 ]
 [0.3591382  0.46134505]
 [0.372593   0.51926816]
 [0.37117046 0.45714062]
 [0.56537473 0.49146947]
 [0.546721   0.52332485]]
[0.04789431 0.05549232 0.04068947 0.03124694 0.02271272 0.00649098
 0.01433696 0.00857254 0.01102508 0.01106864 0.05944091 0.06130827
 0.06713028 0.00886483 0.01146847 0.00841815 0.01548287]
[[0.34225732 0.45723617]
 [0.36063683 0.50320137]
 [0.36551553 0.4543354 ]
 [0.35447544 0.4573613 ]
 [0.44596273 0.5783561 ]
 [0.40233535 0.71003026]
 [0.3985779  0.46270025]
 [0.4511022  0.72

[0.0451558  0.05500259 0.04049546 0.03118243 0.0225916  0.00625482
 0.01394215 0.00844891 0.01050488 0.00886625 0.05847329 0.06037217
 0.06672721 0.00873746 0.01125558 0.00840348 0.01549872]
[[0.36425585 0.4590741 ]
 [0.36085433 0.5022465 ]
 [0.36574385 0.45310375]
 [0.3545521  0.4563244 ]
 [0.44624287 0.57800615]
 [0.40224147 0.7101943 ]
 [0.39834866 0.46228155]
 [0.45131168 0.72516954]
 [0.46528697 0.38569757]
 [0.5484618  0.6211505 ]
 [0.33225203 0.03094205]
 [0.3731125  0.5706905 ]
 [0.3587615  0.46130762]
 [0.3724608  0.51931953]
 [0.37096354 0.45702046]
 [0.56535816 0.49154708]
 [0.54669535 0.5232349 ]]
[0.04505465 0.05493241 0.04042997 0.03113757 0.02256879 0.00625046
 0.01393222 0.00844829 0.01050097 0.00886251 0.05847846 0.06037875
 0.06675573 0.00873632 0.01125392 0.00839962 0.01548556]
[[0.3642432  0.45908654]
 [0.36083996 0.5022627 ]
 [0.36573327 0.4531147 ]
 [0.35454768 0.4563105 ]
 [0.44623178 0.5780233 ]
 [0.40224326 0.710206  ]
 [0.39835155 0.46228212]
 [0.45130122 0.72

In [15]:
model_name = "movenet_lightning"

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

# Define the edges between keypoints to draw the skeleton
KEYPOINT_EDGES = {
    (0, 1): 'm', (0, 2): 'c', (1, 3): 'y', (2, 4): 'y',
    (0, 5): 'm', (0, 6): 'c', (5, 7): 'm', (7, 9): 'y',
    (6, 8): 'c', (8, 10): 'y', (5, 6): 'c', (5, 11): 'm',
    (6, 12): 'c', (11, 12): 'y', (11, 13): 'm', (13, 15): 'y',
    (12, 14): 'c', (14, 16): 'y'
}

def draw_keypoints(frame, keypoints, confidence_threshold):
    """Draws the keypoints and the skeleton on the image."""
    y, x, _ = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y, x, 1]))

    # Draw keypoints
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0, 255, 0), -1)

    # Draw skeleton
    for edge, color in KEYPOINT_EDGES.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]

        if c1 > confidence_threshold and c2 > confidence_threshold:
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error opening video stream or file")
        return
    current_frame = 0
    last_frame = 0
    while cap.isOpened():
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)

        ret, frame = cap.read()
        if not ret:
            break
        if current_frame!=last_frame:
            input_image = tf.image.resize_with_pad(np.expand_dims(frame, axis=0), input_size, input_size)
            keypoints_with_scores = movenet(input_image)

            draw_keypoints(frame, keypoints_with_scores[0][0], 0.3)
        
        # Adjust the delay as needed (milliseconds)
        cv2.imshow('MoveNet Lightning', frame)
        last_frame = current_frame
        key = cv2.waitKey(30) 
        if key == 27:  # ESC key to exit
            break
        elif key == 83 or key == 100:
            current_frame += 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

process_video(video_file)

In [16]:
#@title Helper functions for visualization

# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot

def to_gif(images, duration):
  """Converts image sequence (4D numpy array) to gif."""
  imageio.mimsave('./animation.gif', images, duration=duration)
  return embed.embed_file('./animation.gif')

def progress(value, max=100):
  return HTML("""
      
          {value}
      
  """.format(value=value, max=max))

In [None]:
model_name = "movenet_lightning" #@param ["movenet_lightning", "movenet_thunder", "movenet_lightning_f16.tflite", "movenet_thunder_f16.tflite", "movenet_lightning_int8.tflite", "movenet_thunder_int8.tflite"]

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

In [17]:
#@title Cropping Algorithm

# Confidence score to determine whether a keypoint prediction is reliable.
MIN_CROP_KEYPOINT_SCORE = 0.2

def init_crop_region(image_height, image_width):
  """Defines the default crop region.

  The function provides the initial crop region (pads the full image from both
  sides to make it a square image) when the algorithm cannot reliably determine
  the crop region from the previous frame.
  """
  if image_width > image_height:
    box_height = image_width / image_height
    box_width = 1.0
    y_min = (image_height / 2 - image_width / 2) / image_height
    x_min = 0.0
  else:
    box_height = 1.0
    box_width = image_height / image_width
    y_min = 0.0
    x_min = (image_width / 2 - image_height / 2) / image_width

  return {
    'y_min': y_min,
    'x_min': x_min,
    'y_max': y_min + box_height,
    'x_max': x_min + box_width,
    'height': box_height,
    'width': box_width
  }

def torso_visible(keypoints):
  """Checks whether there are enough torso keypoints.

  This function checks whether the model is confident at predicting one of the
  shoulders/hips which is required to determine a good crop region.
  """
  return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >
           MIN_CROP_KEYPOINT_SCORE or
          keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >
           MIN_CROP_KEYPOINT_SCORE) and
          (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >
           MIN_CROP_KEYPOINT_SCORE or
          keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >
           MIN_CROP_KEYPOINT_SCORE))

def determine_torso_and_body_range(
    keypoints, target_keypoints, center_y, center_x):
  """Calculates the maximum distance from each keypoints to the center location.

  The function returns the maximum distances from the two sets of keypoints:
  full 17 keypoints and 4 torso keypoints. The returned information will be
  used to determine the crop size. See determineCropRegion for more detail.
  """
  torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']
  max_torso_yrange = 0.0
  max_torso_xrange = 0.0
  for joint in torso_joints:
    dist_y = abs(center_y - target_keypoints[joint][0])
    dist_x = abs(center_x - target_keypoints[joint][1])
    if dist_y > max_torso_yrange:
      max_torso_yrange = dist_y
    if dist_x > max_torso_xrange:
      max_torso_xrange = dist_x

  max_body_yrange = 0.0
  max_body_xrange = 0.0
  for joint in KEYPOINT_DICT.keys():
    if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:
      continue
    dist_y = abs(center_y - target_keypoints[joint][0]);
    dist_x = abs(center_x - target_keypoints[joint][1]);
    if dist_y > max_body_yrange:
      max_body_yrange = dist_y

    if dist_x > max_body_xrange:
      max_body_xrange = dist_x

  return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]

def determine_crop_region(
      keypoints, image_height,
      image_width):
  """Determines the region to crop the image for the model to run inference on.

  The algorithm uses the detected joints from the previous frame to estimate
  the square region that encloses the full body of the target person and
  centers at the midpoint of two hip joints. The crop size is determined by
  the distances between each joints and the center point.
  When the model is not confident with the four torso joint predictions, the
  function returns a default crop which is the full image padded to square.
  """
  target_keypoints = {}
  for joint in KEYPOINT_DICT.keys():
    target_keypoints[joint] = [
      keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,
      keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width
    ]

  if torso_visible(keypoints):
    center_y = (target_keypoints['left_hip'][0] +
                target_keypoints['right_hip'][0]) / 2;
    center_x = (target_keypoints['left_hip'][1] +
                target_keypoints['right_hip'][1]) / 2;

    (max_torso_yrange, max_torso_xrange,
      max_body_yrange, max_body_xrange) = determine_torso_and_body_range(
          keypoints, target_keypoints, center_y, center_x)

    crop_length_half = np.amax(
        [max_torso_xrange * 1.9, max_torso_yrange * 1.9,
          max_body_yrange * 1.2, max_body_xrange * 1.2])

    tmp = np.array(
        [center_x, image_width - center_x, center_y, image_height - center_y])
    crop_length_half = np.amin(
        [crop_length_half, np.amax(tmp)]);

    crop_corner = [center_y - crop_length_half, center_x - crop_length_half];

    if crop_length_half > max(image_width, image_height) / 2:
      return init_crop_region(image_height, image_width)
    else:
      crop_length = crop_length_half * 2;
      return {
        'y_min': crop_corner[0] / image_height,
        'x_min': crop_corner[1] / image_width,
        'y_max': (crop_corner[0] + crop_length) / image_height,
        'x_max': (crop_corner[1] + crop_length) / image_width,
        'height': (crop_corner[0] + crop_length) / image_height -
            crop_corner[0] / image_height,
        'width': (crop_corner[1] + crop_length) / image_width -
            crop_corner[1] / image_width
      }
  else:
    return init_crop_region(image_height, image_width)

def crop_and_resize(image, crop_region, crop_size):
  """Crops and resize the image to prepare for the model input."""
  boxes=[[crop_region['y_min'], crop_region['x_min'],
          crop_region['y_max'], crop_region['x_max']]]
  output_image = tf.image.crop_and_resize(
      image, box_indices=[0], boxes=boxes, crop_size=crop_size)
  return output_image

def run_inference(movenet, image, crop_region, crop_size):
  """Runs model inference on the cropped region.

  The function runs the model inference on the cropped region and updates the
  model output to the original image coordinate system.
  """
  image_height, image_width, _ = image.shape
  input_image = crop_and_resize(
    tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)
  # Run model inference.
  keypoints_with_scores = movenet(input_image)
  # Update the coordinates.
  for idx in range(17):
    keypoints_with_scores[0, 0, idx, 0] = (
        crop_region['y_min'] * image_height +
        crop_region['height'] * image_height *
        keypoints_with_scores[0, 0, idx, 0]) / image_height
    keypoints_with_scores[0, 0, idx, 1] = (
        crop_region['x_min'] * image_width +
        crop_region['width'] * image_width *
        keypoints_with_scores[0, 0, idx, 1]) / image_width
  return keypoints_with_scores

In [18]:
!wget -q -O dance.gif https://github.com/tensorflow/tfjs-models/raw/master/pose-detection/assets/dance_input.gif


In [19]:
image_path = 'dance.gif'
image = tf.io.read_file(image_path)
image = tf.image.decode_gif(image)

In [23]:
import imageio
from IPython.display import HTML, display
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

In [24]:
# Load the input image.
num_frames, image_height, image_width, _ = image.shape
crop_region = init_crop_region(image_height, image_width)

output_images = []
bar = display(progress(0, num_frames-1), display_id=True)
for frame_idx in range(num_frames):
  keypoints_with_scores = run_inference(
      movenet, image[frame_idx, :, :, :], crop_region,
      crop_size=[input_size, input_size])
  output_images.append(draw_prediction_on_image(
      image[frame_idx, :, :, :].numpy().astype(np.int32),
      keypoints_with_scores, crop_region=None,
      close_figure=True, output_image_height=300))
  crop_region = determine_crop_region(
      keypoints_with_scores, image_height, image_width)
  bar.update(progress(frame_idx, num_frames-1))

# Prepare gif visualization.
output = np.stack(output_images, axis=0)
to_gif(output, duration=100)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)

  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)


NameError: name 'embed' is not defined

In [34]:
from IPython.display import display, HTML,clear_output
from moviepy.editor import ImageSequenceClip
def to_mp4(output_images, filename, fps=30):
    clip = ImageSequenceClip([image[..., ::-1] for image in output_images], fps=fps)
    clip.write_videofile(filename, codec='libx264')

video_path = video_file
cap = cv2.VideoCapture(video_path)
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
image_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
image_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

crop_region = init_crop_region(image_height, image_width)
cv2.namedWindow('Video with Subtitles', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Video with Subtitles', 800, 600)
output_images = []
bar = display(progress(0, num_frames-1), display_id=True)
last_frame = 0
frame_idx = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    clear_output(wait=True)
    print(frame_idx)
    if last_frame!=frame_idx:
        keypoints_with_scores = run_inference(
            movenet, frame, crop_region,
            crop_size=[input_size, input_size])
        img = draw_prediction_on_image(
            frame.astype(np.int32),
            keypoints_with_scores, crop_region=None,
            close_figure=True, output_image_height=300)

        #output_images.append(img)

        # Display the current image
        cv2.imshow('Video with Subtitles', img)


        crop_region = determine_crop_region(
            keypoints_with_scores, image_height, image_width)
        bar.update(progress(frame_idx, num_frames-1))
    last_frame = frame_idx
    key = cv2.waitKey(30)  # Adjust the delay as needed (milliseconds)

    if key == 27:  # ESC key to exit
        break
    elif key == 83 or key == 100:
        frame_idx += 1


cap.release()
cv2.destroyAllWindows()
# Prepare MP4 visualization.
to_mp4(output_images, 'output.mp4', fps=fps)

0


IndexError: list index out of range

### This is optional tool to run main loop faster without processing text

In [12]:
cap = cv2.VideoCapture(video_file)
dq = collections.deque()
cv2.namedWindow('Video with Subtitles', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Video with Subtitles', 800, 600)
last_frame = 0 
current_frame = 0 
fps = cap.get(cv2.CAP_PROP_FPS)
curr_sub_start = 0
import os
clear = lambda: os.system('cls')

try:
    while True:
        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
        current_time = current_frame / fps

        ret, frame = cap.read()

        if not ret:
            break
        if last_frame != current_frame:
            while subtitles[curr_sub_start]['start'] < current_time:
                print(subtitles[curr_sub_start]['text'])
                dq.append(curr_sub_start)
                curr_sub_start += 1
            if len(dq) > 0:
                while subtitles[dq[0]]['start'] + subtitles[dq[0]]['duration'] < current_time:
                    dq.popleft()

            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray_frame, threshold1=100, threshold2=200)  

            sub_index = 0
            for x in dq:
                cv2.putText(frame, subtitles[x]['text'], (50, 50 + 50 * sub_index), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2, cv2.LINE_AA)
                sub_index += 1

            #img = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
            detection_result = detector.process(frame)
            if(detection_result.pose_landmarks):
                clear()
                print("lewe ramie: ")
                print(detection_result.pose_world_landmarks.landmark[11])
                print("prawe ramie: ")
                print(detection_result.pose_world_landmarks.landmark[12])
                print("lewe biodro: ")
                print(detection_result.pose_world_landmarks.landmark[23])
                print("prawe biodro: ")
                print(detection_result.pose_world_landmarks.landmark[24])
                #print(detection_result.pose_landmarks[11])
                body_angle = calculate_body_rotation_angle(detection_result.pose_landmarks.landmark)
                body_angle_3d = calculate_body_rotation_angle(detection_result.pose_world_landmarks.landmark)
                print("kat na zdjeciu: ")
                print(body_angle)
                print("kat w 3d: ")
                print(body_angle_3d)

                # wheter human is front or back to camera 
                front = False if detection_result.pose_world_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP].x > 0 else True
                left = True if detection_result.pose_world_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP].z < detection_result.pose_world_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP].z else False
                print("przod:")
                print(front)
                print("lewo:")
                print(front)
                #cv2.putText(frame, body_angle, (50, 500), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2, cv2.LINE_AA)
                annotated_image = draw_landmarks_on_image(frame, detection_result)
                bgr_image = cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)
                cv2.imshow('Video with Subtitles', bgr_image)
            else:
               cv2.imshow('Video with Subtitles', frame) 

        #cv2.imshow('Video with Subtitles', frame)

        #cv2.imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))    # Wait for user input (right arrow key to go to the next frame)
        key = cv2.waitKey(30)  # Adjust the delay as needed (milliseconds)
        last_frame = current_frame
        if key == 27:  # ESC key to exit
            break
        elif key == 83 or key == 100:
            current_frame += 1
finally:
    cap.release()
    cv2.destroyAllWindows()

we will approach the squat in two phases
lewe ramie: 
x: -0.10126257
y: -0.38646343
z: 0.25926033
visibility: 0.9979285

prawe ramie: 
x: 0.16045028
y: -0.42304328
z: 0.1250287
visibility: 0.9999043

lewe biodro: 
x: -0.054906376
y: 0.015925106
z: 0.062126763
visibility: 0.99943036

prawe biodro: 
x: 0.05485576
y: -0.015774563
z: -0.061007895
visibility: 0.99995184

kat na zdjeciu: 
-2.5454905818027704
kat w 3d: 
-7.956737334590427
przod:
False
lewo:
False
lewe ramie: 
x: -0.09328172
y: -0.40218708
z: 0.26579478
visibility: 0.9968651

prawe ramie: 
x: 0.1601431
y: -0.42502868
z: 0.12521593
visibility: 0.99986684

lewe biodro: 
x: -0.056926046
y: 0.015943814
z: 0.06404112
visibility: 0.99944574

prawe biodro: 
x: 0.056657165
y: -0.01579237
z: -0.0630135
visibility: 0.9999524

kat na zdjeciu: 
-7.048258708727123
kat w 3d: 
-5.150247842948468
przod:
False
lewo:
False
lewe ramie: 
x: -0.076876715
y: -0.41313618
z: 0.2614864
visibility: 0.9963455

prawe ramie: 
x: 0.16003326
y: -0.4452573
z

### divide text into sentences and add punctuation with ml model

In [None]:

text =''
for obj in subtitles:
    text+=obj['text']

print(len(text))
print(text)


from deepmultilingualpunctuation import PunctuationModel
model = PunctuationModel()

result = model.restore_punctuation(text)
print(len(result))



4705
we will approach the squat in two phasesfirst unloaded to solve problemsassociated with the bottom position andthen loaded to learn how to apply thebottom position to the hip drive usedfor heavier weights since the majorityof the problems with the squat happenedat the bottom this method expedites theprocess quite effectively we will use afairly neutral foot placement with theheels about shoulder width apart and thetoes pointed out at about 30 degreesmany people will assume a stance withtoes pointed too forward so you may needto point them out more than you want tonext you're going to assume the positionyou will be in at the bottom of a squatwithout the barsquat down all the way to a position inwhich the apex of the hip crease dropsjust below the top of the patella putyour elbows against your knees with thepalms of your hands together and shoveyour knees out notice your feet are flaton the floor your knees are shoved outto where they are in a parallel linewith your feet and just a 

  from .autonotebook import tqdm as notebook_tqdm







4790


### removing artificial connections in words (auto generating subtitles from yt isn't ideal)
### also pos-tags are added here

In [None]:
sents = nltk.sent_tokenize(result)

import wordsegment
from wordsegment import load, segment
load()
from nltk.tokenize import word_tokenize

# segment powoduje również tokenizacje zdania dlatego ten etap(tokenizacji) zostanie pominięty.
sents = [(segment(sent)) for sent in sents]
#porter = nltk.PorterStemmer()
#sents = [[porter.stem(t) for t in sent] for sent in sents]
sents = [nltk.pos_tag(sent) for sent in sents]
grammar = r"""
  NP: {<DT|PP\$>?<JJ>*<NN>} 
      {<NNP>+}               
"""
# grammar = r"""
#   NP: {<DT>?<JJ>*<NN>}
#   VP: {<VB.*><NP|PP>*}
#   PP: {<IN><NP>}
#   ADJP: {<JJ>}
#   ADVP: {<RB.*>}
# """
cp = nltk.RegexpParser(grammar) 
	
# class ConsecutiveNPChunkTagger(nltk.TaggerI): 

#     def __init__(self, train_sents):
#         train_set = []
#         for tagged_sent in train_sents:
#             untagged_sent = nltk.tag.untag(tagged_sent)
#             history = []
#             for i, (word, tag) in enumerate(tagged_sent):
#                 featureset = npchunk_features(untagged_sent, i, history) 
#                 train_set.append( (featureset, tag) )
#                 history.append(tag)
#         self.classifier = nltk.MaxentClassifier.train( 
#             train_set, algorithm='megam', trace=0)

#     def tag(self, sentence):
#         history = []
#         for i, word in enumerate(sentence):
#             featureset = npchunk_features(sentence, i, history)
#             tag = self.classifier.classify(featureset)
#             history.append(tag)
#         return zip(sentence, history)

# class ConsecutiveNPChunker(nltk.ChunkParserI):
#     def __init__(self, train_sents):
#         tagged_sents = [[((w,t),c) for (w,t,c) in
#                          nltk.chunk.tree2conlltags(sent)]
#                         for sent in train_sents]
#         self.tagger = ConsecutiveNPChunkTagger(tagged_sents)

#     def parse(self, sentence):
#         tagged_sents = self.tagger.tag(sentence)
#         conlltags = [(w,t,c) for ((w,t),c) in tagged_sents]
#         return nltk.chunk.conlltags2tree(conlltags)
    
# def npchunk_features(sentence, i, history):
#      word, pos = sentence[i]
#      return {"pos": pos}
# chunker = ConsecutiveNPChunker(train_sents)
# print(chunker.evaluate(test_sents))


# sents = [cp.parse(sent) for sent in sents]





In [None]:
# nltk.download('maxent_ne_chunker')
# nltk.download('treebank')
# print(sents[30])
sent = nltk.corpus.treebank.tagged_sents()[22]
#print(sent)
print(sents[15])
#print(nltk.ne_chunk(sent))
print(nltk.ne_chunk(sents[25]))

# sentence = [("the", "DT"), ("little", "JJ"), ("yellow", "JJ"),
# ("dog", "NN"), ("barked", "VBD"), ("at", "IN"),  ("the", "DT"), ("cat", "NN")]

# grammar = "NP: {<DT>?<JJ>*<NN>}" 

# cp = nltk.RegexpParser(grammar) 
# result = cp.parse(sentence) 
# print(result) 
grammar = r"NP: {<[CDJNP].*>+}"
cp = nltk.RegexpParser(grammar)
# print(cp.evaluate(sents))
# result.draw() 

[('measured', 'VBN'), ('from', 'IN'), ('the', 'DT'), ('markings', 'NNS'), ('placed', 'VBN'), ('on', 'IN'), ('the', 'DT'), ('bar', 'NN'), ('for', 'IN'), ('this', 'DT'), ('purpose', 'NN'), ('a', 'DT'), ('standard', 'JJ'), ('powerbar', 'NN'), ('has', 'VBZ'), ('16to17', 'CD'), ('inches', 'NNS'), ('between', 'IN'), ('the', 'DT'), ('ends', 'NNS'), ('of', 'IN'), ('the', 'DT'), ('inside', 'JJ'), ('neural', 'JJ'), ('and', 'CC'), ('32', 'CD'), ('inches', 'NNS'), ('between', 'IN'), ('the', 'DT'), ('finger', 'NN'), ('marks', 'NNS')]
(S
  again/RB
  heels/NNS
  should/MD
  be/VB
  about/IN
  shoulder/NN
  width/NNS
  apart/RB
  with/IN
  toes/NNS
  pointed/VBN
  out/RP
  about/IN
  30/CD
  degrees/NNS
  at/IN
  this/DT
  point/NN
  you/PRP
  are/VBP
  ready/JJ
  to/TO
  squat/VB
  with/IN
  the/DT
  empty/JJ
  bar/NN)


### Finding sentences with technique rules(unfinished) (regexp: noun(body part) and verb)

In [None]:
text = "When performing squats with a barbell, ensure your back is straight, knees do not extend beyond your toes, and the barbell rests securely on your shoulders."

from nltk.corpus import wordnet as wn
import nltk 
nltk.download('wordnet')
part = wn.synsets('body_part')[0]

def is_body_part(candidate):
    for ss in wn.synsets(candidate):
        # only get those where the synset matches exactly
        name = ss.name().split(".", 1)[0]
        if name != candidate:
            continue
        hit = part.lowest_common_hypernyms(ss)
        if hit and hit[0] == part:
            return True
    return False

# for word in sents[0]:
#     print(is_body_part(word[0]), word[0], sep="\t")

# Procesowanie każdego zdania
# for sentence in sents:
#     if any(is_body_part(t[0].lower()) for t in sentence):
#         print(f"Zdanie zawiera część ciała: {sentence}")

import nltk
from nltk import CFG
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk import pos_tag
from nltk.parse import ChartParser

# Lista części ciała
body_parts = ["head", "arm", "leg", "hand", "foot", "eye", "ear", "nose", "mouth", "shoulder", "knee", "elbow"]

# Definicja gramatyki bezkontekstowej z użyciem POS tags
grammar = CFG.fromstring("""
  S -> NP VP
  NP -> DT JJNN | JJNN
  VP -> VBZ NP | VBZ ADJP | VBZ PP
  DT -> 'the' | 'a' | 'his' | 'her'
  JJNN -> JJ NN | JJNN JJ NN
  JJ -> 'badly' | 'quickly' | 'slowly' | 'fast'
  NN -> 'head' | 'arm' | 'leg' | 'hand' | 'foot' | 'eye' | 'ear' | 'nose' | 'mouth' | 'shoulder' | 'knee' | 'elbow'
  VBZ -> VB
  ADJP -> JJ NP
  NP -> DT JJNN
  PP -> IN NP
  IN -> 'in'
""")

# Tworzenie parsera
parser = ChartParser(grammar)

# Tokenizacja tekstu na zdania
for sentence in sents:
    # Sprawdzanie czy zdanie pasuje do gramatyki
    #print(sentence)
    words = [word for word, tag in sentence]
    try:
        for tree in parser.parse(words):
            # # Sprawdzanie czy pierwsza fraza rzeczownikowa jest częścią ciała
            # np = tree[0]
            # if np.label() == 'NP' and np[0][0].lower() in body_parts:
            #     print(f"Zdanie zawiera część ciała jako podmiot: {sentence}")
            tree.pretty_print()
    except ValueError:
            # Jeżeli parser nie znajdzie pasującego drzewa, przechodzi do następnego zdania
        #print("nie ma drzewa")    
        continue
import re 

#pattern = r'(head|arm|leg|hand|foot|eye|ear|nose|back|mouth|shoulder|knee|elbow)'
pattern = r'\b(head|arm|leg|hand|foot|eye|ear|nose|back|mouth|shoulder|knee|elbow)\b\s+(\w+)'

sents_for_regexp = nltk.sent_tokenize(result)
sents_for_regexp = [(segment(sent)) for sent in sents_for_regexp]
for sentence in sents_for_regexp:
    #print(sentence)
    sem = ' '.join([str(elem) for elem in sentence])
    #print(sem)
    match = re.search(pattern,sem)
    if match:
        print(sem)
        #print(match.string)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Damian\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


we will use a fairly neutral foot placement with the heels about shoulder width apart and the toes pointed out at about 30 degrees many people will assume a stance with toes pointed too forward so you may need to point them out more than you want
your back should be as flat as you can get it
also notice that your back is inclined at about a45 degree angle not at all vertical and your eyes are looking down at the floor a few feet in front of you
this movement keeps your weight solidly over the whole foot instead of letting it shift to the toes
grip width for the squat will vary with shoulder width and flexibility but in general the hands will be between these two markings
the elbows should be lifted up to trap the bar between the hands and the back elbows should be up but not high
with your grip in place and your hands and thumbs on top of the bar dip your head under the bar and come up into position with the bar on your back just below the spine of the scapula the bone you feel at the 

In [None]:
for sent in sents:
    for tuple in sent:
        print(tuple[0],end = " ")
    print("\n")    

we will approach the squat in two phases first unloaded to solve problems associated with the bottom position and then loaded to learn how to apply the bottom position to the hip drive used for heavier weights 

since the majority of the problems with the squat happened at the bottom this method expedites the process quite effectively 

we will use a fairly neutral foot placement with the heels about shoulder width apart and the toes pointed out at about 30 degrees many people will assume a stance with toes pointed too forward so you may need to point them out more than you want 

to next your e going to assume the position you will be in at the bottom of a squat without the bar squat down all the way to a position in which the apex of the hip crease drops just below the top of the patella 

put your elbows against your knees with the palms of your hands together and shove your knees out 

notice your feet are flat on the floor 

your knees are shoved out to where they are in a paralle

### main loop of the program

In [None]:
cap = cv2.VideoCapture(video_file)
dq = collections.deque()
cv2.namedWindow('Video with Subtitles', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Video with Subtitles', 800, 600)

current_frame = 0 
fps = cap.get(cv2.CAP_PROP_FPS)
curr_sub_start = 0
while True:
    cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame)
    current_time = current_frame / fps

    ret, frame = cap.read()

    if not ret:
        break


    while(subtitles[curr_sub_start]['start']<current_time):
        print(subtitles[curr_sub_start]['text'])
        dq.append(curr_sub_start)
        curr_sub_start=curr_sub_start+1
    if(len(dq) >0):
        while(subtitles[dq[0]]['start'] + subtitles[dq[0]]['duration']<current_time):
            dq.popleft()
    

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    edges = cv2.Canny(gray_frame, threshold1=100, threshold2=200)  

    sub_index=0
    for x in dq:
        cv2.putText(frame, subtitles[x]['text'], (50, 50+50*sub_index), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2, cv2.LINE_AA)
        sub_index+=1


    img = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    detection_result = detector.detect(img)
    # body_angle = calculate_body_rotation_angle(detection_result.pose_landmarks.landmark)
    # cv2.putText(frame, body_angle, (50, 500), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2, cv2.LINE_AA)


    annotated_image = draw_landmarks_on_image(img.numpy_view(), detection_result)
    bgr_image = cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)

    # Display the image using OpenCV
    #cv2.imshow('Video with Subtitles', frame)
    cv2.imshow('Video with Subtitles', bgr_image)
    #cv2.imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))    # Wait for user input (right arrow key to go to the next frame)
    key = cv2.waitKey(30)  # Adjust the delay as needed (milliseconds)
    if key == 27:  # ESC key to exit
        break
    elif key == 83 or key == 100:
        current_frame += 1
cap.release()
cv2.destroyAllWindows()


AttributeError: 'list' object has no attribute 'landmark'