In [2]:
import os
import tensorflow as tf
import numpy as np
import glob
from tqdm import tqdm
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import matplotlib.pyplot as plt
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

base_options = python.BaseOptions(model_asset_path="hand_landmarker.task")
options = vision.HandLandmarkerOptions(base_options=base_options,
                                     num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

def print_landmarks_coordinates(hand_landmarks_list, handedness_list):
    # Dictionary of landmark names
    right_landmarks = np.zeros((2,21,3))

    
    for hand_idx, hand_landmarks in enumerate(hand_landmarks_list):
        if handedness_list[hand_idx][0].category_name == "Right":
            hand_array = [[landmark.x, landmark.y, landmark.z] for landmark in hand_landmarks]
            landmarks_array = np.array(hand_array)
            right_landmarks[0]=landmarks_array
        if handedness_list[hand_idx][0].category_name == "Left":
            hand_array = [[landmark.x, landmark.y, landmark.z] for landmark in hand_landmarks]
            landmarks_array = np.array(hand_array)
            right_landmarks[1]=landmarks_array

    return right_landmarks

def process_image(frame):
    # print("Processing for files "+frame)
    frame = cv2.imread(frame)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Create MediaPipe image from the RGB frame
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
    
    # Detect hand landmarks
    detection_result = detector.detect(mp_image)

    # Print coordinates
    coordinates = print_landmarks_coordinates(detection_result.hand_landmarks, detection_result.handedness)

    return coordinates


In [None]:
import os
import cv2
import glob
import numpy as np
import tensorflow as tf
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

import mediapipe as mp



def print_landmarks_coordinates(hand_landmarks_list, handedness_list):
    right_landmarks = np.zeros((2,21,3), dtype=np.float32)
    for hand_idx, hand_landmarks in enumerate(hand_landmarks_list):
        if handedness_list[hand_idx][0].category_name == "Right":
            hand_array = [[landmark.x, landmark.y, 0] for landmark in hand_landmarks]
            landmarks_array = np.array(hand_array, dtype=np.float32)
            right_landmarks[0] = landmarks_array
        if handedness_list[hand_idx][0].category_name == "Left":
            hand_array = [[landmark.x, landmark.y, 0] for landmark in hand_landmarks]
            landmarks_array = np.array(hand_array, dtype=np.float32)
            right_landmarks[1] = landmarks_array
    return right_landmarks

def process_image(image_path):
    # print("Processing for files "+frame)
    frame = cv2.imread(image_path)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Create MediaPipe image from the RGB frame
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
    
    # Detect hand landmarks
    detection_result = detector.detect(mp_image)

    # Print coordinates
    coordinates = print_landmarks_coordinates(detection_result.hand_landmarks, detection_result.handedness)
    assert coordinates.shape == (2,21,3), f"BAD SHAPE: {coordinates.shape} for {image_path}"
    assert coordinates.dtype == np.float32, f"BAD DTYPE: {coordinates.dtype} for {image_path}"
    return coordinates

# --- Paths ---
WORKSPACE_PATH = "E:/Folder_mata_kuliah/Semester_7/PRA TA/Python prep/workspace_jauh"
DATASET_PATH = os.path.join(WORKSPACE_PATH, "Dataset")
OUTPUT_TFRECORD = os.path.join(WORKSPACE_PATH, "dataset_without_mirror_without_z_also_21class.tfrecords")
-
def create_tf_example(processed_data, label_index):
    feature = {
        'coordinates': tf.train.Feature(float_list=tf.train.FloatList(value=processed_data.flatten())),
        'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label_index])),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

def create_tfrecord():

    label_folders = [f for f in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, f))]
    label_folders.sort()

    label_encoder = LabelEncoder()
    label_encoder.fit(label_folders)

    image_paths = []
    labels = []
    for label in label_folders:
        label_path = os.path.join(DATASET_PATH, label)
        image_files = glob.glob(os.path.join(label_path, "*.jpg"))
        image_paths.extend(image_files)
        labels.extend([label] * len(image_files))

    encoded_labels = label_encoder.transform(labels)

    with tf.io.TFRecordWriter(OUTPUT_TFRECORD) as writer:
        for image_path, label_index in tqdm(zip(image_paths, encoded_labels), total=len(image_paths)):
            processed_data = process_image(image_path)
            # if np.all(processed_data == 0):  # Skip if all values are zero
            #     continue
            example = create_tf_example(processed_data, label_index)
            writer.write(example.SerializeToString())

    label_mapping = {i: label for i, label in enumerate(label_encoder.classes_)}
    with open(os.path.join(WORKSPACE_PATH, "dataset_labels.txt"), "w") as f:
        for idx, name in label_mapping.items():
            f.write(f"{idx}: {name}\n")
    print(f"TFRecord and label mapping saved. Classes: {label_mapping}")

def read_tfrecord(num_classes):
    feature_description = {
        'coordinates': tf.io.FixedLenFeature([2*21*3], tf.float32),  # Flattened 2x21x3 tensor
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    def _parse_function(example_proto):
        parsed = tf.io.parse_single_example(example_proto, feature_description)
        coordinates = tf.reshape(parsed['coordinates'], (2,21,3))
        label = tf.one_hot(parsed['label'], depth=num_classes)
        return coordinates, label

    dataset = tf.data.TFRecordDataset(OUTPUT_TFRECORD).map(_parse_function)
    print("Sample records:")
    for i, (data, label) in enumerate(dataset.take(3)):
        print(f"Sample {i+1}: data shape {data.shape}, label shape {label.shape}, label {label.numpy()}")
    return dataset

if __name__ == "__main__":
    create_tfrecord()

    with open(os.path.join(WORKSPACE_PATH, "dataset_labels.txt")) as f:
        lines = f.readlines()
    num_classes = len(lines)

    parsed_dataset = read_tfrecord(num_classes)

    train_dataset = parsed_dataset.shuffle(100).batch(16).prefetch(tf.data.AUTOTUNE)
    for data_batch, label_batch in train_dataset.take(1):
        print("Batch data shape:", data_batch.shape)
        print("Batch label shape:", label_batch.shape)

100%|██████████| 24971/24971 [21:45<00:00, 19.13it/s]


TFRecord and label mapping saved. Classes: {0: 'apa', 1: 'bagus', 2: 'berapa', 3: 'bicara', 4: 'bisa', 5: 'buruk', 6: 'iya', 7: 'kapan', 8: 'kasih', 9: 'kau', 10: 'kita', 11: 'maaf', 12: 'perlu', 13: 'saya', 14: 'sedih', 15: 'senang', 16: 'terima', 17: 'tidak', 18: 'tidak_ada', 19: 'tolong', 20: 'tunggu'}
Sample records:
Sample 1: data shape (2, 21, 3), label shape (21,), label [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Sample 2: data shape (2, 21, 3), label shape (21,), label [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Sample 3: data shape (2, 21, 3), label shape (21,), label [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Batch data shape: (16, 2, 21, 3)
Batch label shape: (16, 21)
