In [64]:
import cv2
import mediapipe as mp
import numpy as np
import os
import pandas as pd
import PIL
import wget
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from os import listdir
from os.path import isfile, join


In [4]:
# download mediapipe model bundle
wget.download("https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task")

'hand_landmarker.task'

In [2]:
# Google's pre-written function to visualise hand landmark detection image
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [3]:
# function to read in all images in a folder
def read_images_from_folder(folder_path):
    """
    Reads all images from the specified folder and stores them in a dictionary.

    Args:
        folder_path (str): The path to the folder containing the images.

    Returns:
        dict: A dictionary where keys are filenames and values are the images.
    """
    images = {}
    for filename in os.listdir(folder_path):
        if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif', '.PNG')):
            img_path = os.path.join(folder_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                images[filename] = img
            else:
                print(f"Warning: {filename} could not be read as an image.")
    return images

In [70]:
# get list of files in images folder
folder_path='C:/Users/awright/Documents/git_repos/bsl-interpreter/images/Vowels_20240725/'
vowel_file_names = [join(folder_path, f) for f in listdir(folder_path) if isfile(join(folder_path, f))]

In [4]:
# read in all images from images folder using new function
bsl_images = read_images_from_folder(folder_path)

In [5]:
# get lists of keys and values for images dictionary
bsl_images_keys = list(bsl_images)
bsl_images_vals = list(bsl_images.values())
bsl_images_vals

[array([[[141, 143, 154],
         [139, 141, 152],
         [137, 137, 149],
         ...,
         [241, 226, 223],
         [241, 226, 223],
         [241, 226, 223]],
 
        [[143, 145, 156],
         [141, 143, 154],
         [139, 139, 151],
         ...,
         [241, 226, 223],
         [241, 226, 223],
         [241, 226, 223]],
 
        [[144, 144, 156],
         [142, 142, 154],
         [140, 140, 152],
         ...,
         [241, 226, 223],
         [241, 226, 223],
         [241, 226, 223]],
 
        ...,
 
        [[ 94,  86,  93],
         [ 93,  86,  91],
         [ 94,  84,  90],
         ...,
         [251, 237, 243],
         [252, 238, 244],
         [252, 238, 244]],
 
        [[ 98,  89,  99],
         [ 97,  89,  96],
         [ 97,  86,  94],
         ...,
         [251, 237, 243],
         [252, 238, 244],
         [252, 238, 244]],
 
        [[104,  95, 105],
         [102,  93, 103],
         [100,  89,  97],
         ...,
         [251, 237, 243],
  

In [6]:
# test import of images
cv2.namedWindow('frame', cv2.WINDOW_AUTOSIZE)
cv2.imshow('frame', bsl_images_vals[1])
cv2.waitKey(0)
# only run if necessary - notebooks don't like showing the images!!! 
# can cause kernel to crash

-1

In [26]:
# Create HandLandmarker object
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2,
                                       min_hand_detection_confidence=0.01, # very low threshold
                                       min_hand_presence_confidence=0.5
                                       )
detector = vision.HandLandmarker.create_from_options(options)

In [36]:
# Load input image
image = mp.Image.create_from_file(vowel_file_names[0])

In [37]:
# Detect hand landmarks from input image
detection_result = detector.detect(image)



In [38]:
im2 = image.numpy_view()
copied_im2_array = np.copy(im2)


In [39]:
print(copied_im2_array.shape)
type(copied_im2_array)

(1080, 1920, 3)


numpy.ndarray

In [53]:
bgr_img = copied_im2_array[:,:,0:3]
print(bgr_img.shape)

(1080, 1920, 3)


In [56]:
bgr_img_2 = np.array([subarr[:, ::-1] if subarr.ndim ==2 else subarr[::-1] for subarr in bgr_img])

In [57]:
# Process classification result - visualise it
annotated_image = draw_landmarks_on_image(bgr_img_2, detection_result)

In [58]:
cv2.imshow('frame', annotated_image)
cv2.waitKey(0)

-1

In [60]:
x = annotated_image - bgr_img_2
cv2.imshow('frame', x)
cv2.waitKey(0)
# not reading the hands in the image as hands - need to fine tune model

-1

In [74]:
# write funcion to read image, detect hands, draw landmarks, and write image
def read_draw_write_hands(img_path, output_path):
    '''
    Reads an image from a filepath, detects and draws hand landmarks, and writes the 
    annotated image to a specified folder
    
    Args:
        img_path (str): The path to the image.
        output_folder (str): The path to the folder to write the annotated image to

    Returns:

    '''
    img_name = os.path.basename(img_path)
    image = mp.Image.create_from_file(img_path)
    detection_result = detector.detect(image)
    image_array = image.numpy_view()
    image_array_rgb = np.array([subarr[:, ::-1] if subarr.ndim ==2 else subarr[::-1] for subarr in image_array])
    annotated_image = draw_landmarks_on_image(image_array, detection_result)
    annotated_image_file = PIL.Image.fromarray(annotated_image)
    annotated_image_file.save(output_path + "annotated_" + img_name)
    
    
    
    

In [75]:
for file in vowel_file_names:
    read_draw_write_hands(file, 
                          'C:/Users/awright/Documents/git_repos/bsl-interpreter/images/Outputs/')

In [62]:
test_pillow = PIL.Image.fromarray(annotated_image)

In [63]:
test_pillow.show()