In [16]:
import sys
sys.path.append('..')

In [17]:
import os
import cv2
import math
import shutil
import numpy as np
import pandas as pd
import mediapipe as mp

from matplotlib import pyplot as plt
from utils.read import read_landmarks

from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [18]:
landmark_data = read_landmarks('hand_landmarks.json')
test_split = 0.2

In [19]:
print(list(landmark_data.keys())[0])

dataset/try3/archive/asl_alphabet_train/asl_alphabet_train/del/del2036.jpg


In [20]:
def extract_label_from_path(path):
    return path.split('/')[-2]

In [21]:
# Height and width that will be used by the model
DESIRED_HEIGHT = 480
DESIRED_WIDTH = 480
BG_COLOR = (0, 0, 0) # black
MASK_COLOR = (255, 255, 255) # white

In [22]:
# Performs resizing
def resize_and_show(image):
  h, w = image.shape[:2]
  if h < w:
    img = cv2.resize(image, (DESIRED_WIDTH, math.floor(h/(w/DESIRED_WIDTH))))
  else:
    img = cv2.resize(image, (math.floor(w/(h/DESIRED_HEIGHT)), DESIRED_HEIGHT))
  return img

In [23]:
# Create the options that will be used for ImageSegmenter
base_options = python.BaseOptions(model_asset_path='deeplabv3.tflite')
options = vision.ImageSegmenterOptions(base_options=base_options,
                                       output_category_mask=True)

def segment_image(image):
  # Create the image segmenter
  with vision.ImageSegmenter.create_from_options(options) as segmenter:
    # Create the MediaPipe image file that will be segmented
    image_mp = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)

    # Retrieve the masks for the segmented image
    segmentation_result = segmenter.segment(image_mp)
    category_mask = segmentation_result.category_mask

    # Generate solid color images for showing the output segmentation mask.
    image_data = image_mp.numpy_view()
    fg_image = np.zeros(image_data.shape, dtype=np.uint8)
    fg_image[:] = MASK_COLOR
    bg_image = np.zeros(image_data.shape, dtype=np.uint8)
    bg_image[:] = BG_COLOR

    condition = np.stack((category_mask.numpy_view(),) * 3, axis=-1) > 0.2
    output_image = np.where(condition, fg_image, bg_image)
    output_image = cv2.resize(output_image, (200, 200))
    return output_image

In [24]:
def split_dataset(landmark_data, test_split):
    included_labels = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K",
                       "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V",
                       "W", "X", "Y", "Z"]
    # Calculate class distribution and group by class
    label_distribution = {}
    label_data = {}
    for key, value in landmark_data.items():
        label = extract_label_from_path(key)
        if not label in included_labels:
            continue
        if not label in label_distribution:
            label_distribution[label] = 0
        label_distribution[label] += 1
    
        if not label in label_data:
            label_data[label] = []
        
        label_data[label].append([key, value])
    # Create an even distribution of classes
    max_per_class = min(label_distribution.values())
    print(max_per_class)
    print("min class label: ", min(label_distribution, key=label_distribution.get))
    for key in label_data.keys():
        label_data[key] = label_data[key][:max_per_class]
    # Split the data into train and test
    train_data = []
    test_data = []
    for key in label_data.keys():
        split_index = math.floor(len(label_data[key]) * test_split)
        test_data += label_data[key][:split_index]
        train_data += label_data[key][split_index:]
    return train_data, test_data

In [25]:
training_data, testing_data = split_dataset(landmark_data, test_split)

1315
min class label:  N


In [26]:
def to_csv_landmarks(dataset, output_csv):
    column_names_landmark = ['label']
    for i in range(21):
        column_names_landmark.append('keypoint{}_x'.format(i+1))
        column_names_landmark.append('keypoint{}_y'.format(i+1))
    
    df = pd.DataFrame(columns=column_names_landmark)
    for data in dataset:
        label = extract_label_from_path(data[0])
        keypoints = data[1]
        row = [label]
        for keypoint in keypoints:
            row.append(keypoint[0])
            row.append(keypoint[1])
        df.loc[len(df)] = row
    df.to_csv(output_csv, index=False)

In [27]:
def create_image_dataset(dataset, output_folder, binary=False):
    for data in dataset:
        label = extract_label_from_path(data[0])
        
        if binary:
            image = cv2.imread("../" + data[0])
            image = resize_and_show(image)
            image = segment_image(image)
        
        if not os.path.exists(os.path.join(output_folder, label)):
            os.makedirs(os.path.join(output_folder, label))
        
        if binary:
            cv2.imwrite(os.path.join(output_folder, label, data[0].split('/')[-1]), image)
        else:
            shutil.copyfile("../" + data[0], os.path.join(output_folder, label, data[0].split('/')[-1]))


In [None]:
if not os.path.exists('SignLanguageClassification'):
    os.makedirs('SignLanguageClassification')

if not os.path.exists('SignLanguageClassification/train'):
    os.makedirs('SignLanguageClassification/train')

if not os.path.exists('SignLanguageClassification/test'):
    os.makedirs('SignLanguageClassification/test')

if not os.path.exists('SignLanguageClassification/train_binary'):
    os.makedirs('SignLanguageClassification/train_binary')

if not os.path.exists('SignLanguageClassification/test_binary'):
    os.makedirs('SignLanguageClassification/test_binary')

In [28]:
to_csv_landmarks(training_data, 'SignLanguageClassification/train_landmarks.csv')
to_csv_landmarks(testing_data, 'SignLanguageClassification/test_landmarks.csv')

In [29]:
create_image_dataset(training_data, 'SignLanguageClassification/train')
create_image_dataset(testing_data, 'SignLanguageClassification/test')

In [30]:
create_image_dataset(training_data, 'SignLanguageClassification/train_binary', binary=True)
create_image_dataset(testing_data, 'SignLanguageClassification/test_binary', binary=True)

I0000 00:00:1700442295.962871  670824 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1700442295.988263  822404 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.54.03), renderer: NVIDIA RTX A6000/PCIe/SSE2
I0000 00:00:1700442296.062615  670824 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1700442296.073031  822415 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.54.03), renderer: NVIDIA RTX A6000/PCIe/SSE2
I0000 00:00:1700442296.148152  670824 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1700442296.160251  822426 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.54.03), renderer: NVIDIA RTX A6000/PCIe/SSE2
I0000 00:00:1700442296.229500  670824 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1700442296.239714  822437 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.54.03), renderer: NVIDIA RTX A600