In [4]:
import time
import cv2
from cvzone.HandTrackingModule import HandDetector
from cvzone.PoseModule import PoseDetector


def webcum():
    cap = cv2.VideoCapture(0)
    hands_detector = HandDetector()
    pose_detector = PoseDetector()
    while True:
        success, img = cap.read()
        hands, img1 = hands_detector.findHands(img)
        print(hands)
        img2 = pose_detector.findPose(img)
        lmList, bboxInfo = pose_detector.findPosition(img, bboxWithHands=False)
        print(lmList)
        print()
        time.sleep(2)
        # cv2.imshow("CUM", img)
        # cv2.waitKey(1)

In [2]:
webcum()

dict_keys(['05237', '69422', '10899', '10898', '10893', '10892', '10896', '10895', '10894', '51069', '51068', '51064', '51067', '51066', '51061', '51060', '66779', '66778', '65278', '43180', '06365', '69395', '69396', '49185', '57290', '57291', '69370', '56839', '56838', '56837', '56835', '06335', '06334', '06337', '06336', '06331', '06330', '06333', '06332', '06339', '06338', '11768', '11769', '11767', '30849', '64298', '64299', '64296', '64297', '64294', '64295', '64292', '64293', '64290', '64291', '65504', '65507', '65506', '65503', '45442', '45443', '45440', '45441', '05734', '05735', '05736', '05737', '05730', '05731', '05732', '05733', '70244', '70245', '70246', '70247', '05739', '70242', '42838', '05229', '17728', '17729', '17724', '17725', '17726', '17727', '17720', '17721', '17722', '17723', '68178', '68171', '68177', '14624', '14625', '26973', '26972', '26975', '14621', '26977', '26976', '26978', '14628', '31766', '53277', '14685', '68288', '68770', '65029', '63208', '14680',

In [32]:
import torch
from torch.utils.data import Dataset
from cvzone.HandTrackingModule import HandDetector
from cvzone.PoseModule import PoseDetector
import cv2

class ASLDataset(Dataset):
    def __init__(self, video_folder, name_with_label, classes, transform=None):
        self.video_folder = video_folder
        self.name_with_label = name_with_label
        self.videos_names = list(self.name_with_label.keys())
        self.classes = classes
        self.transform = transform
        self.hands_detector = HandDetector()
        self.pose_detector = PoseDetector()
    
    def __len__(self):
        return len(self.videos_names)
    
    def __getitem__(self, index):
        video_path = self.video_folder + '/' + self.videos_names[index] + '.mp4'
        # Open the video file using OpenCV
        video = cv2.VideoCapture(video_path)
        frames_points = []
        frame_cnt = 0
        start_frame, end_frame = self.name_with_label[self.videos_names[index]]['action'][1], self.name_with_label[self.videos_names[index]]['action'][2]
        while video.isOpened():
            ret, frame = video.read()
            frame_cnt += 1
            # If frame inside action frames then preprocess them
            if ret and start_frame <= frame_cnt <= end_frame:
                # Perform any necessary preprocessing on the frame
                if self.transform is not None:
                    frame = self.transform(frame)
                # Collect all points. 21 points for each hand, 33 points on pose
                points = [0] * (21 * 3 * 2 + 33 * 3)

                # Recognize hands and collect them into list of all points
                hands, img1 = self.hands_detector.findHands(frame)
                for i in range(len(hands)):
                    ind_shift = 0
                    if hands[i].get('type') == 'Left':
                        ind_shift = 21 * 3
                    hand_points = hands[i].get('lmList')
                    for j in range(len(hand_points)):
                        for k in range(3):
                            points[ind_shift + j * 3 + k] = hand_points[j][k]

                # Recognize the pose and collect points
                img2 = self.pose_detector.findPose(frame)
                lmList, bboxInfo = self.pose_detector.findPosition(frame, bboxWithHands=False)
                for i in range(len(lmList)):
                    for j in range(1, 4):
                        points[21 * 3 * 2 + i * 3 + j - 1] = lmList[i][j]
                # print('\t' + frame_cnt + '\t' + points)
                frames_points.append(points)
            elif not ret:
                break

        # Release the video object
        video.release()

        # Convert the list of frames to a PyTorch tensor
        tensor = torch.tensor(frames_points)

        return tensor, self.name_with_label[self.videos_names[index]]['action'][0]

In [33]:
import json

nslt = json.load(open('dataset/nslt_100.json'))

classes = {}
with open('dataset/wlasl_class_list.txt', 'r') as file:
    for line in file:
        line = line.strip().split('\t')
        key = int(line[0])
        value = line[1]
        classes[key] = value

dataset = ASLDataset('dataset/videos', nslt, classes)

In [34]:
dataset[10]

([[93,
   96,
   0,
   97,
   91,
   0,
   104,
   88,
   0,
   109,
   88,
   0,
   112,
   89,
   0,
   107,
   85,
   0,
   115,
   85,
   0,
   121,
   85,
   -2,
   125,
   85,
   -2,
   108,
   87,
   -1,
   116,
   86,
   -2,
   122,
   86,
   -3,
   127,
   85,
   -4,
   108,
   91,
   -2,
   117,
   89,
   -4,
   122,
   88,
   -5,
   127,
   87,
   -5,
   108,
   96,
   -4,
   115,
   96,
   -5,
   119,
   96,
   -5,
   122,
   96,
   -5,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   150,
   50,
   -246,
   155,
   43,
   -237,
   158,
   43,
   -237,
   160,
   43,
   -237,
   147,
   44,
   -233,
   144,
   44,
   -233,
   142,
   45,
   -233,
 