In [8]:
import cv2
import mediapipe as mp
import os

In [9]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [10]:
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

In [11]:
def detect_landmarks(image):
    results = hands.process(image)

    if results.multi_hand_landmarks:
        return results.multi_hand_landmarks
    return []  # Return an empty list if no landmarks are detected

In [12]:
def process_frame(frame):
    frame = cv2.resize(frame, (640, 480))
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    result = detect_landmarks(frame_rgb)
    
    for hand_landmarks in result:
        mp_drawing.draw_landmarks(
            frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
    return frame

In [13]:
word_data_path = 'ISL_CSLRT_Corpus/Frames_Word_Level'

In [17]:
img = cv2.imread(os.path.join(word_data_path, 'CHAT/CHAT_(5).jpg'))
img = process_frame(img)
cv2.imshow('Processed Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [18]:
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = process_frame(frame)

    cv2.imshow('Hand Landmarks', frame)
    if cv2.waitKey(1) & 0xFF == 27:  # Press 'Esc' to exit
        break

cap.release()
cv2.destroyAllWindows()

In [19]:
import tensorflow as tf
import numpy as np

In [20]:
classes = np.unique(os.listdir(word_data_path))
num_classes = len(classes)
print("Number of classes:", num_classes)

Number of classes: 114


In [21]:
label_lookup = tf.keras.layers.StringLookup(
    vocabulary = classes,
    mask_token = None,
    num_oov_indices = 0
)

In [22]:
tf.data.experimental.enable_debug_mode()
total_features = 126  # 21 keypoints * 3 (x, y, z) * 2 hands

# Functions for feature extraction and augmentation
def process_image_with_augmentation(image_path, augment_factor):
  label = tf.strings.split(image_path, os.sep)[-2]
  encoded_label = label_lookup(label)

  img = tf.io.read_file(image_path)

  try:
    img = tf.image.decode_jpeg(img, channels=3)
  except:
    return np.zeros((0, total_features)), encoded_label

  img = tf.image.resize(img, [128, 128])

  def extract_features(img):
    features_list = []
    img = tf.cast(img, dtype=tf.uint8).numpy()

    results = hands.process(img)
    if results.multi_hand_landmarks:
      landmarks = [(landmark.x, landmark.y, landmark.z) for landmark in results.multi_hand_landmarks.landmark]
      # features = normalize_keypoints_z(np.array(landmarks))

      # Apply comprehensive augmentation
      # augmented_keypoints_list = comprehensive_augmentation(
      #   features, num_augmentations=num_augment
      # )

      # Convert to the format expected by the model
      # for keypoints in augmented_keypoints_list:
      #   joint_angles = extract_joint_angles(np.array(keypoints))
      #   body_ratios = extract_body_ratios(np.array(keypoints))

        # Filtering out only important landmarks, removing head
        # filtered_keypoints = np.concatenate([keypoints[:1], keypoints[7:9], keypoints[11:]], axis=0)

        # features_vector = np.concatenate([filtered_keypoints.flatten(), joint_angles, body_ratios], axis=0)
        # features_list.append(features_vector)

      features_list.append(np.array(landmarks).flatten())

      return np.array(features_list)
    else:
      return np.zeros((0, total_features))

  features = tf.py_function(extract_features, [img], [tf.float32])
  return features, encoded_label

def split_points(features, labels):
  features = tf.reshape(features, [-1, total_features])
  labels = tf.repeat(labels, tf.shape(features)[0])
  return tf.data.Dataset.from_tensor_slices((features, labels))

# Create a Dataset Pipeline from the frames stored in directories
def extract_keypoints_dataset_with_augmentation(path, augment_factor=0):
  dataset = tf.data.Dataset.list_files(f'{path}/*/*', shuffle=False)

  processed_dataset = dataset.map(
    lambda x: process_image_with_augmentation(x, augment_factor),
    num_parallel_calls=tf.data.AUTOTUNE
  )
  processed_dataset = processed_dataset.filter(lambda x,y: tf.shape(x)[1]>0)  # Remove out None values
  return processed_dataset

In [28]:
abs_path = os.path.abspath(word_data_path).replace('\\', '/')
abs_path
# dataset = tf.data.Dataset.list_files(f'{abs_path}/*/*', shuffle=False)
# for data in dataset.take(5):
#   # Convert backslashes to forward slashes for cross-platform compatibility
#   file_path = data.numpy().decode('utf-8').replace('\\', '/')
#   print(file_path)

'c:/Users/hp/OneDrive/Desktop/anmol/AI‑Assisted Sign Language Translator/Sign-Language-Translator/ISL_CSLRT_Corpus/Frames_Word_Level'

In [25]:
train_dataset = extract_keypoints_dataset_with_augmentation(word_data_path, 4).shuffle(1000).cache()
for data in train_dataset.take(5):
  print("Features data shape:", data[0].numpy().shape, "Label:", data[1])

InvalidArgumentError: Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'No files matched pattern: ISL_CSLRT_Corpus/Frames_Word_Level/*/*'

In [21]:
train_dataset_batched = scaled_train_dataset.batch(256).prefetch(tf.data.AUTOTUNE).repeat()

InvalidArgumentError: {{function_node __wrapped__IteratorGetNext_output_types_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Cannot batch tensors with different shapes in component 0. First element had shape [4,224,224,3] and element 9 had shape [5,224,224,3]. [Op:IteratorGetNext] name: 