In [1]:
import os
import time
import ast
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp

### Parameters and Threshold values

In [18]:
min_detection_confidence = [0.5, 0.6, 0.7]
min_tracking_confidence = [0.5, 0.6, 0.7, 0.8, 0.9]
model_complexity = [1]
static_image_mode = True
max_num_hands = 1

### Comparing aspects

#### 1) MNAE - Mean absolute error normalized by palm size

In [19]:
def calculate_MNAE(mp_df, gt_df):
    error_sum = 0
    for i in range(len(mp_df)):
        wrist = ast.literal_eval(gt_df.loc[i,'0'])
        m_mcp = ast.literal_eval(gt_df.loc[i,'9'])
        palm_size = np.sqrt((wrist[0]-m_mcp[0])**2 + (wrist[1]-m_mcp[1])**2 + (wrist[2] - m_mcp[2])**2)
        for j in range(0,21):
            mp_coord = mp_df.loc[i,str(j)]
            gt_coord = ast.literal_eval(gt_df.loc[i,str(j)])
            error = np.sqrt(((mp_coord[0] - gt_coord[0])**2) + ((mp_coord[1] - gt_coord[1])**2) + ((mp_coord[2] - gt_coord[2])**2)) / palm_size
            error_sum += error
    mnae = error_sum / (len(mp_df) * 21)
    return mnae

#### 2) Proportion of detected hands

#### 3) Processing time per image

### Hyperparameter Tunning

In [20]:
image_folder = "evaluation/"

In [21]:
gt_df = pd.read_csv('gt_df.csv')

In [22]:
columns = ['min_detection_confidence', 'min_tracking_confidence',
            'model_complexity', 'MNAE', 'detected proportion','average process time']
result_df = pd.DataFrame(columns=columns)

In [23]:
for mdc in min_detection_confidence:
    for mtc in min_tracking_confidence:
        for mc in model_complexity:
            gt_df = pd.read_csv('gt_df.csv')
            total_images = len(gt_df)
            start_time = time.perf_counter()

            # Mediapipe Hands model
            mp_hands = mp.solutions.hands
            hands = mp_hands.Hands(min_detection_confidence=mdc,
                                   min_tracking_confidence=mtc, 
                                   model_complexity=mc,
                                   static_image_mode=True,
                                   max_num_hands=1)
            
            results_list = []
            # Processing each image in the folder
            for img_name in os.listdir(image_folder):
                img_path = os.path.join(image_folder, img_name)
                img = cv2.imread(img_path)
            
                if img is None:
                    #print(f"Skipping {img_name} (could not be read)")
                    continue
            
                # Converting BGR to RGB (Mediapipe requires RGB input)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
                # Processing image with Mediapipe
                result = hands.process(img_rgb)
            
                if result.multi_hand_landmarks:
                    landmarks = result.multi_hand_landmarks[0]  # Get the first detected hand
            
                    # Store landmarks as a list of [x, y, z] values
                    landmark_data = {"image": img_name} 
                    landmark_data.update({str(i): [lm.x, lm.y, lm.z] for i, lm in enumerate(landmarks.landmark)})
            
                    results_list.append(landmark_data)
            
            end_time = time.perf_counter()
            process_time = end_time - start_time
            
            # Convert results to DataFrame
            mp_df = pd.DataFrame(results_list)
            
            detected_list = list()
            for i in mp_df.image:
                detected_list.append(int(i[5:8]))
            
            gt_df = gt_df.iloc[detected_list]
            gt_df = gt_df.reset_index()
            
            # Mean absolute error normalised by palm size
            mnae = calculate_MNAE(mp_df, gt_df)
            
            # Detected image proportion
            detected_proportion = len(mp_df) / total_images
            
            # Average process time per image in seconds
            avg_process_time = process_time / len(mp_df)
            
            ## Adding to the result_df
            combination = f"min_detection_confidence = {mdc}\nmin_tracking_confidence = {mtc}\nmodel_complexity = {mc}"
            
            new_row = pd.DataFrame({'min_detection_confidence':[mdc], 
                                    'min_tracking_confidence':[mtc],
                                    'model_complexity':[mc],
                                    'MNAE': [mnae], 'detected proportion':[detected_proportion],'average process time':[avg_process_time]})
            result_df = pd.concat([result_df,new_row],ignore_index=True)

  result_df = pd.concat([result_df,new_row],ignore_index=True)


In [24]:
result_df

Unnamed: 0,min_detection_confidence,min_tracking_confidence,model_complexity,MNAE,detected proportion,average process time
0,0.5,0.5,1,11.358416,0.896667,0.088308
1,0.5,0.6,1,11.376137,0.873333,0.083362
2,0.5,0.7,1,11.388947,0.863333,0.096312
3,0.5,0.8,1,11.424911,0.84,0.096697
4,0.5,0.9,1,11.473472,0.803333,0.094092
5,0.6,0.5,1,11.370068,0.883333,0.082313
6,0.6,0.6,1,11.379457,0.87,0.079858
7,0.6,0.7,1,11.399602,0.853333,0.081245
8,0.6,0.8,1,11.433682,0.833333,0.084081
9,0.6,0.9,1,11.482249,0.793333,0.087725
