In [1]:
import cv2
import mediapipe as mp
import pandas as pd
from PIL import Image, ImageOps

import numpy as np
import glob, os

import matplotlib.pyplot as plt

initial_dir = os.getcwd()

df = pd.DataFrame()
frames = []

In [2]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode


options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2,
)
hands = HandLandmarker.create_from_options(options)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
# Convert normalized coordinates to Matplotlib-compatible coordinates
def convert_to_matplotlib_coords(coord, image_height):
    x, y = coord
    return x, image_height - y

def normalize_coordinates(coordinates, target):
    max_x = max(coord[0] for coord in coordinates)
    max_y = max(coord[1] for coord in coordinates)
    
    normalized_coordinates = []
    for x, y in coordinates:
        normalized_x = (x / max_x)
        normalized_y = (y / max_y)
        normalized_coordinates.append([normalized_x, normalized_y])
    
    # Convert coordinates for plotting
    # matplotlib_coords = [convert_to_matplotlib_coords(coord, image_height) for coord in normalized_coordinates]
    # if (target == "v" or target=="y"):
    #     visualize_data(normalized_coordinates, target)
    return normalized_coordinates

def visualize_data(normalized_coordinates, target):
    # Unzip normalized coordinates for plotting
    normalized_x, normalized_y = zip(*normalized_coordinates)
    
    # Plot the normalized coordinates
    plt.figure(figsize=(8, 6))
    plt.scatter(normalized_x, normalized_y, color='blue', label='Normalized Coordinates')
    plt.xlabel('Normalized X')
    plt.xlabel('Normalized X')
    plt.ylabel('Normalized Y')
    plt.title('Normalized Coordinates Plot')
    plt.legend(target)
    plt.grid(True)
    plt.show()

In [4]:
def extract_video(name, target, sequence_id):
    global frames

    # Load an image from file

    image = cv2.imdecode(np.fromfile(name, np.uint8), cv2.IMREAD_UNCHANGED)
    mp_image = mp.Image.create_from_file(name)
    hand_landmarker_result = hands.detect(mp_image)

    # Check if the image was loaded successfully
    if mp_image is not None:
        # Convert the BGR frame to RGB
        rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
        if len(hand_landmarker_result.handedness) > 0:
            row_data = {
                "sequence_id": sequence_id,
                "target": target,
                "file": name
            }
            
            hand_sides = ["Left", "Right"]
            for idx, landmarks in enumerate(hand_landmarker_result.hand_landmarks):
                detected_pixels = []
                hand_side = hand_sides[idx]
                # Iterate through detected hand landmarks
                for landmark_idx, landmark in enumerate(landmarks):
                    x, y = landmark.x, landmark.y
                    detected_pixels.append([x, y])

                    # Draw circles on the frame
                    cv2.circle(image, (int(x * image.shape[1]), int(y * image.shape[0])), 5, (0, 255, 0), -1)

                detected_pixels = normalize_coordinates(detected_pixels, target)
                # if (target == "v" or target=="y"):
        
                for i in range(len(detected_pixels)):
                    x, y = detected_pixels[i]
                    row_data[f'x_{hand_side}_hand_{i}'] =  x
                    row_data[f'y_{hand_side}_hand_{i}'] =  y
            
            if (len(hand_landmarker_result.handedness) == 1 and (target == "x" or target == "ñ" or target == "q")):
                print(name)
                return
                
            if (len(hand_landmarker_result.handedness) == 1):
                for i in range(21):
                    x, y = [0, 0]
                    row_data[f'x_{hand_sides[1]}_hand_{i}'] =  x
                    row_data[f'y_{hand_sides[1]}_hand_{i}'] =  y


            # cv2.imshow('Original', image)
            # cv2.waitKey(0)
            frames.append(row_data)
        
    else:
        print("Failed to load image.")

    cv2.destroyAllWindows()

In [5]:
sequence_id = 0
validation_sequence_ids = []

def extract_coordinates_from_dir(dir, is_val=False):
    global sequence_id, initial_dir, validation_sequence_ids
    os.chdir(dir)

    for file in glob.glob("*"):
        if (file.split(".")[-1] != "png"):
            splitted_name = file.split(".")[:-1]
            splitted_name.append(".png")
            renamed_format = "".join(splitted_name)

            im = Image.open(file)
            im = ImageOps.exif_transpose(im)
            im.save(renamed_format)

            os.remove(file)

    files_to_extract = glob.glob("*.png")
    files_to_extract.sort()
    for file in files_to_extract:
        sequence_id += 1

        if (is_val):
            validation_sequence_ids.append(sequence_id)

        name = file.split(" ")[0]

        if (len(file.split(" ")) == 1):
            name = file.split(".")[0]

        extract_video(file, name, sequence_id)

    os.chdir(initial_dir)

In [6]:
training_dir = f'{initial_dir}/data/asl_letters_lensegua_c'
extract_coordinates_from_dir(training_dir)

q (1) 2.png
q (4) 2.png
q (6).png
q (7).png
x (9) 2.png
ñ (11).png
ñ (9).png


In [7]:
validation_dir = f'{initial_dir}/data/letters_val'
extract_coordinates_from_dir(validation_dir, is_val=True)

In [8]:
df = pd.DataFrame(frames)

In [9]:
df['sequence_id'] = df['sequence_id'].astype(int)

In [10]:
df.head()

Unnamed: 0,sequence_id,target,file,x_Left_hand_0,y_Left_hand_0,x_Left_hand_1,y_Left_hand_1,x_Left_hand_2,y_Left_hand_2,x_Left_hand_3,...,x_Right_hand_16,y_Right_hand_16,x_Right_hand_17,y_Right_hand_17,x_Right_hand_18,y_Right_hand_18,x_Right_hand_19,y_Right_hand_19,x_Right_hand_20,y_Right_hand_20
0,1,a,a (1) 2.png,0.566217,0.982714,0.594175,0.883338,0.711333,0.778675,0.779545,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,a,a (1) 3.png,0.529522,0.926254,0.630963,0.837728,0.751088,0.736737,0.797417,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,a,a (1).png,0.927725,0.977053,0.956801,0.940599,0.967153,0.900984,0.96568,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,a,a (10) 2.png,0.859173,0.907511,0.907352,0.854298,0.958791,0.795681,0.969218,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,a,a (10) 3.png,0.97877,0.945584,0.903704,0.842624,0.848464,0.728467,0.830719,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
len(df.columns)

87

In [12]:
if (len(validation_sequence_ids) > 0):
    validation_path = f'{initial_dir}/data/validation_letters.csv'
    validation_df = df[df['sequence_id'].isin(validation_sequence_ids)][["sequence_id", "target", "file"]]
    validation_df.to_csv(validation_path, index=False)

    print(validation_df)
    
    train_path = f'{initial_dir}/data/train_letters.csv'
    train_df = df[~df['sequence_id'].isin(validation_sequence_ids)][["sequence_id", "target", "file"]]
    train_df.to_csv(train_path, index=False)

     sequence_id target   file
704          775      a  a.png
705          776      b  b.png
706          777      c  c.png
707          778      d  d.png
708          779      e  e.png
709          780      g  g.png
710          781      h  h.png
711          782      i  i.png
712          783      k  k.png
713          784      l  l.png
714          785      m  m.png
715          786      n  n.png
716          787      o  o.png
717          788      p  p.png
718          789      q  q.png
719          790      r  r.png
720          791      t  t.png
721          792      u  u.png
722          793      v  v.png
723          794      w  w.png
724          795      x  x.png
725          796      y  y.png
726          797      z  z.png
727          798      ñ  ñ.png


In [13]:
data_path = f'{initial_dir}/data/data_letters.csv'
df_final = df.drop("file", axis=1)
df_final.to_csv(data_path, index=False)