In [None]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [None]:
img = Image.open('./image.jpg')
# plot original image
plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
# STEP 1: Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an HandLandmarker object.
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

# STEP 3: Load the input image.
image = mp.Image.create_from_file("image.jpg")

# STEP 4: Detect hand landmarks from the input image.
detection_result = detector.detect(image)

# STEP 5: Process the classification result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
# cv2_imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
plt.imshow(annotated_image)
plt.axis('off')
plt.show()

In [None]:
# compare the two images
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(img)
ax[0].axis('off')
ax[0].set_title('Original Image')
ax[1].imshow(annotated_image)
ax[1].axis('off')
ax[1].set_title('Annotated Image')

plt.show()

In [None]:
# detect the number of hands (Taiwanese Sign Language)
data_dir = "./chinese_number_gestures/" # 0 ~ 9

fig, ax = plt.subplots(2, 10, figsize=(20, 6))

fig.suptitle('Detection of Taiwanese Sign Language Numbers', fontsize=16)

for i in range(10):
    img = Image.open(data_dir + str(i) + ".png")
    ax[0, i].imshow(img)
    ax[0, i].axis('off')
    ax[0, i].set_title(f'Original {i}')

    image = mp.Image.create_from_file(data_dir + str(i) + ".png")
    detection_result = detector.detect(image)
    
    # Debug print to check detection result
    print(f"Detection result for image {i}: {detection_result}")
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
    ax[1, i].imshow(annotated_image)
    ax[1, i].axis('off')
    ax[1, i].set_title(f'Annotated {i}')

plt.show()

In [None]:
data_dir = "./real_case/teacher_wu/" # 0 ~ 9

fig, ax = plt.subplots(2, 10, figsize=(20, 6))

fig.suptitle('Detection of Taiwanese Sign Language Numbers', fontsize=16)

for i in range(10):
    img = Image.open(data_dir + 'wu_' + str(i) + ".png")
    ax[0, i].imshow(img)
    ax[0, i].axis('off')
    ax[0, i].set_title(f'Original {i}')

    image = mp.Image.create_from_file(data_dir + 'wu_' + str(i) + ".png")
    detection_result = detector.detect(image)
    
    # Debug print to check detection result
    print(f"Detection result for image {i}: {detection_result}")
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
    ax[1, i].imshow(annotated_image)
    ax[1, i].axis('off')
    ax[1, i].set_title(f'Annotated {i}')

plt.show()

In [None]:
example_number3_loc = './chinese_number_gestures/3.png'
real_case_number3_loc = './real_case/teacher_wu/wu_3.png'

example_number3 = Image.open(example_number3_loc)
real_case_number3 = Image.open(real_case_number3_loc)

image_example_number3 = mp.Image.create_from_file(example_number3_loc)
detection_result_example_number3 = detector.detect(image_example_number3)
annotated_image_example_number3 = draw_landmarks_on_image(image_example_number3.numpy_view(), detection_result_example_number3)

image_real_case_number3 = mp.Image.create_from_file(real_case_number3_loc)
detection_result_real_case_number3 = detector.detect(image_real_case_number3)
annotated_image_real_case_number3 = draw_landmarks_on_image(image_real_case_number3.numpy_view(), detection_result_real_case_number3)

# compare the two images detection result
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(annotated_image_example_number3)
ax[0].axis('off')
ax[0].set_title('Example Image 3')
ax[1].imshow(annotated_image_real_case_number3)
ax[1].axis('off')
ax[1].set_title('Real Case Image 3')

plt.show()

In [None]:
from reg import HandLandmarkProcessor

processor = HandLandmarkProcessor()

basic_result_ex_num3 = processor.convert_to_array(detection_result_example_number3)
basic_result_real_num3 = processor.convert_to_array(detection_result_real_case_number3)
print(f"Shape of basic_result_ex_num3: {basic_result_ex_num3[0].shape}")
print(f"Shape of basic_result_real_num3: {basic_result_real_num3[0].shape}")
# print(basic_result)
# print(basic_result[0].shape)

In [None]:
import torch
import torch.nn.functional as F

print(F.cosine_similarity(torch.tensor(basic_result_ex_num3[0]), torch.tensor(basic_result_real_num3[0]), dim=1).mean())

In [None]:
# new plot (real case teacher wu 0 ~ 9 detection and classification)

data_dir = "./real_case/teacher_wu/" # 0 ~ 9

fig, ax = plt.subplots(2, 10, figsize=(20, 8))

fig.suptitle('Detection of Taiwanese Sign Language Numbers', fontsize=16)

for i in range(10):
    img = Image.open(data_dir + 'wu_' + str(i) + ".png")
    ax[0, i].imshow(img)
    ax[0, i].axis('off')
    ax[0, i].set_title(f'Original {i}')

    image = mp.Image.create_from_file(data_dir + 'wu_' + str(i) + ".png")
    detection_result = detector.detect(image)

    number_image = mp.Image.create_from_file(f"./chinese_number_gestures/{i}.png")
    number_detection_result = detector.detect(number_image)
    
    # Debug print to check detection result
    # print(f"Detection result for image {i}: {detection_result}")
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
    ax[1, i].imshow(annotated_image)
    ax[1, i].axis('off')
    ax[1, i].set_title(f'Annotated {i}')
    
    basic_result = processor.convert_to_array(detection_result)
    basic_result_number = processor.convert_to_array(number_detection_result)
    # print(f"Shape of basic_result: {basic_result[0].shape}")
    # print(F.cosine_similarity(torch.tensor(basic_result[0]), torch.tensor(basic_result_number[0]), dim=1).mean())
    # add confidence score to the image
    ax[1, i].set_title(f'Annotated {i} \n Confidence: {F.cosine_similarity(torch.tensor(basic_result[0]), torch.tensor(basic_result_number[0]), dim=1).mean():.2f} \n Prediction: {i}')
    

plt.show()

In [None]:
import seaborn as sns

class GestureSimilarityMatrix:
    def __init__(self, processor, detector):
        self.processor = processor
        self.detector = detector
        
    def create_similarity_matrix(self, 
                               real_data_dir: str, 
                               example_data_dir: str, 
                               real_prefix: str = "wu_",
                               num_gestures: int = 10) -> np.ndarray:
        """
        創建手勢相似度矩陣
        
        Args:
            real_data_dir: 實際案例圖片目錄
            example_data_dir: 範例圖片目錄
            real_prefix: 實際案例圖片檔名前綴
            num_gestures: 手勢數量
            
        Returns:
            similarity_matrix: numpy array of shape (num_gestures, num_gestures)
        """
        # 初始化相似度矩陣
        similarity_matrix = np.zeros((num_gestures, num_gestures))
        
        # 計算所有組合的相似度
        for i in range(num_gestures):
            # 載入實際案例圖片
            real_image = mp.Image.create_from_file(f"{real_data_dir}{real_prefix}{i}.png")
            real_result = self.detector.detect(real_image)
            real_landmarks = self.processor.convert_to_array(real_result)
            
            if real_landmarks is None:
                print(f"Warning: Failed to detect hand in real image {i}")
                continue
                
            for j in range(num_gestures):
                # 載入範例圖片
                example_image = mp.Image.create_from_file(f"{example_data_dir}{j}.png")
                example_result = self.detector.detect(example_image)
                example_landmarks = self.processor.convert_to_array(example_result)
                
                if example_landmarks is None:
                    print(f"Warning: Failed to detect hand in example image {j}")
                    continue
                
                # 計算相似度
                similarity = F.cosine_similarity(
                    torch.tensor(real_landmarks[0]),
                    torch.tensor(example_landmarks[0]),
                    dim=1
                ).mean().item()
                
                similarity_matrix[i, j] = similarity
                
        return similarity_matrix
    
    def plot_similarity_matrix(self, 
                             similarity_matrix: np.ndarray,
                             real_data_dir: str,
                             example_data_dir: str,
                             real_prefix: str = "wu_",
                             figsize: tuple = (15, 10)):
        """繪製相似度矩陣熱圖和原始圖片"""
        num_gestures = similarity_matrix.shape[0]
        
        # 創建主圖和軸
        fig = plt.figure(figsize=figsize)
        gs = plt.GridSpec(2, 2, height_ratios=[1, 2], width_ratios=[3, 1])
        
        # 上排顯示範例圖片
        ax_examples = fig.add_subplot(gs[0, 0])
        ax_examples.set_title("Example Gestures")
        ax_examples.axis('off')
        
        # 在上排橫向排列範例圖片
        for i in range(num_gestures):
            img = Image.open(f"{example_data_dir}{i}.png")
            ax_img = fig.add_axes([0.1 + i*0.08, 0.75, 0.07, 0.15])  # 調整位置和大小
            ax_img.imshow(img)
            ax_img.axis('off')
            ax_img.set_title(f'{i}')
        
        # 左側顯示實際案例圖片
        ax_real = fig.add_subplot(gs[1, 1])
        ax_real.set_title("Real Gestures")
        ax_real.axis('off')
        
        # 在左側縱向排列實際案例圖片
        for i in range(num_gestures):
            img = Image.open(f"{real_data_dir}{real_prefix}{i}.png")
            ax_img = fig.add_axes([0.85, 0.7 - i*0.06, 0.07, 0.05])  # 調整位置和大小
            ax_img.imshow(img)
            ax_img.axis('off')
            ax_img.set_title(f'{i}')
        
        # 繪製相似度矩陣熱圖
        ax_heatmap = fig.add_subplot(gs[1, 0])
        sns.heatmap(similarity_matrix, 
                   annot=True, 
                   fmt='.2f', 
                   cmap='YlOrRd',
                   xticklabels=range(num_gestures),
                   yticklabels=range(num_gestures),
                   ax=ax_heatmap)
        
        ax_heatmap.set_title('Similarity Matrix')
        ax_heatmap.set_xlabel('Example Gestures')
        ax_heatmap.set_ylabel('Real Gestures')
        
        plt.tight_layout()
        plt.show()
        
        # 輸出每個手勢的最佳匹配
        print("\nBest matches for each gesture:")
        for i in range(num_gestures):
            best_match = np.argmax(similarity_matrix[i])
            print(f"Real gesture {i} best matches with example {best_match} "
                  f"(similarity: {similarity_matrix[i, best_match]:.3f})")

In [None]:
# 初始化比較器
processor = HandLandmarkProcessor()
detector = mp.tasks.vision.HandLandmarker.create_from_options(
    mp.tasks.vision.HandLandmarkerOptions(
        base_options=mp.tasks.BaseOptions(model_asset_path='hand_landmarker.task'),
        num_hands=1))
        
# 創建相似度矩陣
matrix_creator = GestureSimilarityMatrix(processor, detector)

# 計算並繪製相似度矩陣
similarity_matrix = matrix_creator.create_similarity_matrix(
    real_data_dir="./real_case/teacher_wu/",
    example_data_dir="./chinese_number_gestures/",
    real_prefix="wu_"
)

# 繪製結果
matrix_creator.plot_similarity_matrix(
    similarity_matrix,
    real_data_dir="./real_case/teacher_wu/",
    example_data_dir="./chinese_number_gestures/",
    real_prefix="wu_"
)