In [1]:
import os
import time
import ast
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp

### Parameters and Threshold values

In [2]:
min_detection_confidence = [0.5, 0.6, 0.7]
min_tracking_confidence = [0.5, 0.6, 0.7, 0.8, 0.9]
model_complexity = [1]
static_image_mode = True
max_num_hands = 1

### Comparing aspects

#### 1) MAE - Mean absolute error

In [3]:
def calculate_MAE(mp_df, gt_df):
    error_sum = 0
    for i in range(len(mp_df)):
        for j in range(0,21):
            mp_coord = mp_df.loc[i,j]
            gt_coord = ast.literal_eval(gt_df.loc[i,str(j)])
            error = np.sqrt(((mp_coord[0] - gt_coord[0])**2) + ((mp_coord[1] - gt_coord[1])**2)) 
            error_sum += error
    mae = error_sum / (len(mp_df) * 21)
    return mae

### Hyperparameter Tunning

In [4]:
image_folder = "rgb/"

In [5]:
gt_df = pd.read_csv('gt_df.csv')

In [6]:
columns = ['min_detection_confidence', 'min_tracking_confidence',
            'model_complexity', 'MAE', 'detected proportion','average process time']
result_df = pd.DataFrame(columns=columns)

In [7]:
df_count = 1

for mdc in min_detection_confidence:
    for mtc in min_tracking_confidence:
        for mc in model_complexity:
            gt_df = pd.read_csv('gt_df.csv')
            total_images = len(gt_df)
            start_time = time.perf_counter()

            # Mediapipe Hands model
            mp_hands = mp.solutions.hands
            hands = mp_hands.Hands(min_detection_confidence=mdc,
                                   min_tracking_confidence=mtc, 
                                   model_complexity=mc,
                                   static_image_mode=True,
                                   max_num_hands=1)
            
            # List to store results
            results_list = []
            
            # Start timer
            start_time = time.perf_counter()
            
            
            # Processing each image in the folder
            for img_name in os.listdir(image_folder):
                img_path = os.path.join(image_folder, img_name)
                img = cv2.imread(img_path)
            
                if img is None:
                    continue
            
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
                # Processing image with Mediapipe
                result = hands.process(img_rgb)
            
                landmark_data = {"image": img_name}
            
                if result.multi_hand_landmarks:
                    landmarks = result.multi_hand_landmarks[0]  # Get the first detected hand
                    h, w, _ = img.shape  # Image dimensions (height, width)
            
                    for i, lm in enumerate(landmarks.landmark):
                        # Convert normalized coordinates to pixel coordinates
                        x = lm.x * w
                        y = lm.y * h
            
                        landmark_data[i] = [x, y]
                else:
                    continue
            
                results_list.append(landmark_data)
            
            end_time = time.perf_counter()
            process_time = end_time - start_time
            
            mp_df = pd.DataFrame(results_list)
            mp_df.to_csv('mp_results/mp_df' + str(df_count) + '.csv', index=False)
            df_count += 1

            # detected_list = list()
            # for i in mp_df.image:
            #     detected_list.append(int(i[5:8]))
            
            # gt_df = gt_df.iloc[detected_list]
            # gt_df = gt_df.reset_index()
            
            # Mean absolute error normalised by palm size
            mae = calculate_MAE(mp_df, gt_df)
            
            # Detected image proportion
            detected_proportion = len(mp_df) / total_images
            
            # Average process time per image in seconds
            avg_process_time = process_time / len(mp_df)
            
            ## Adding to the result_df
            combination = f"min_detection_confidence = {mdc}\nmin_tracking_confidence = {mtc}\nmodel_complexity = {mc}"
            
            new_row = pd.DataFrame({'min_detection_confidence':[mdc], 
                                    'min_tracking_confidence':[mtc],
                                    'model_complexity':[mc],
                                    'MAE': [mae], 'detected proportion':[detected_proportion],'average process time':[avg_process_time]})
            result_df = pd.concat([result_df,new_row],ignore_index=True)

  result_df = pd.concat([result_df,new_row],ignore_index=True)


In [9]:
detection_proportions = pd.read_csv('comb_df.csv')
detection_proportions

Unnamed: 0,Combination,Proportions
0,Combination 1,0.9
1,Combination 2,0.884444
2,Combination 3,0.875556
3,Combination 4,0.855556
4,Combination 5,0.823333
5,Combination 6,0.885556
6,Combination 7,0.873333
7,Combination 8,0.865556
8,Combination 9,0.847778
9,Combination 10,0.813333


In [13]:
result_df['detected proportion'] = detection_proportions['Proportions']

In [16]:
result_df

Unnamed: 0,min_detection_confidence,min_tracking_confidence,model_complexity,MAE,detected proportion,average process time
0,0.5,0.5,1,7.725107,0.9,0.081894
1,0.5,0.6,1,7.725107,0.884444,0.087699
2,0.5,0.7,1,7.725107,0.875556,0.082946
3,0.5,0.8,1,7.725107,0.855556,0.085632
4,0.5,0.9,1,7.725107,0.823333,0.088376
5,0.6,0.5,1,7.726208,0.885556,0.083678
6,0.6,0.6,1,7.726208,0.873333,0.081004
7,0.6,0.7,1,7.726208,0.865556,0.080951
8,0.6,0.8,1,7.726208,0.847778,0.089279
9,0.6,0.9,1,7.726208,0.813333,0.082792
