<a href="https://colab.research.google.com/github/hez4777/gesture_recognizer/blob/main/gesture_test_old.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install mediapipe scikit-learn seaborn

import os
import glob
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image
from typing import List, Dict, Tuple, Optional
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

Mounted at /content/drive
Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.21 sounddevice-0.5.1


In [None]:
def list_gesture_folders(dataset_path: str) -> List[str]:
    """List all gesture subfolders in the dataset directory."""
    return [d for d in os.listdir(dataset_path)
            if os.path.isdir(os.path.join(dataset_path, d))]

def sample_images_from_folders(dataset_path: str, folders: List[str],
                              sample_ratio: float = 0.2) -> Dict[str, List[str]]:
    """Sample a percentage of images from each gesture folder."""
    sampled_images = {}

    for folder in folders:
        folder_path = os.path.join(dataset_path, folder)
        image_paths = glob.glob(os.path.join(folder_path, "*.jpg")) + \
                     glob.glob(os.path.join(folder_path, "*.jpeg")) + \
                     glob.glob(os.path.join(folder_path, "*.png"))

        num_samples = max(1, int(len(image_paths) * sample_ratio))
        sampled = random.sample(image_paths, num_samples)
        sampled_images[folder] = sampled

        print(f"Sampled {len(sampled)} images from {folder} (total: {len(image_paths)})")

    return sampled_images

def load_gesture_recognizer(model_path: str) -> vision.GestureRecognizer:
    """Load the gesture recognizer model."""
    base_options = python.BaseOptions(model_asset_path=model_path)
    options = vision.GestureRecognizerOptions(
        base_options=base_options,
        running_mode=vision.RunningMode.IMAGE,
        num_hands=1
    )
    return vision.GestureRecognizer.create_from_options(options)

def process_image(image_path: str, recognizer: vision.GestureRecognizer) -> Optional[str]:
    """
    Process a single image and return the recognized gesture.
    Returns None if no hand is detected.
    """
    try:
        image = mp.Image.create_from_file(image_path)
        recognition_result = recognizer.recognize(image)

        if recognition_result.gestures and len(recognition_result.gestures) > 0:
            return recognition_result.gestures[0][0].category_name
        else:
            # No gesture detected
            return "None"
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return "None"

def evaluate_gesture_recognizer(dataset_path: str, model_path: str,
                              sample_ratio: float = 0.2) -> Tuple[np.ndarray, List[str], pd.DataFrame]:
    """
    Evaluate the gesture recognizer on sampled images from each gesture folder.
    Returns confusion matrix, class names, and a DataFrame with detailed results.
    """
    gesture_folders = list_gesture_folders(dataset_path)
    print(f"Found {len(gesture_folders)} gesture classes: {gesture_folders}")

    sampled_images = sample_images_from_folders(dataset_path, gesture_folders, sample_ratio)

    recognizer = load_gesture_recognizer(model_path)

    true_labels = []
    predicted_labels = []
    results_data = []

    for gesture_class, image_paths in sampled_images.items():
        for image_path in image_paths:
            true_label = gesture_class

            predicted_label = process_image(image_path, recognizer)

            true_labels.append(true_label)
            predicted_labels.append(predicted_label)
            results_data.append({
                "image_path": image_path,
                "true_label": true_label,
                "predicted_label": predicted_label
            })

            print(f"Image: {os.path.basename(image_path)}, True: {true_label}, Predicted: {predicted_label}")

    # confusion matrix
    all_classes = gesture_folders.copy()
    if "None" in predicted_labels and "None" not in all_classes:
        all_classes.append("None")

    conf_matrix = confusion_matrix(true_labels, predicted_labels, labels=all_classes)

    results_df = pd.DataFrame(results_data)

    return conf_matrix, all_classes, results_df

def plot_confusion_matrix(conf_matrix: np.ndarray, class_names: List[str], save_path: Optional[str] = None,
                      normalize: bool = False, precision: bool = False):
    """
    Plot and optionally save the confusion matrix.

    Args:
        conf_matrix: The confusion matrix to plot
        class_names: List of class names
        save_path: Optional path to save the figure
        normalize: Whether to normalize the matrix (to 1 or 100%)
        precision: If True, normalize by predicted (column); if False, normalize by actual (row)
    """
    plt.figure(figsize=(10, 8))

    cm_display = conf_matrix.copy().astype(float)

    if normalize:
        if precision:
            # Normalize by column (predicted) for precision matrix
            col_sums = cm_display.sum(axis=0)
            col_sums[col_sums == 0] = 1e-10  # Avoid division by zero
            cm_display = cm_display / col_sums[np.newaxis, :]
            title = 'Normalized Precision Confusion Matrix'
            fmt = '.3f'
        else:
            # Normalize by row (true) for recall matrix
            row_sums = cm_display.sum(axis=1)
            row_sums[row_sums == 0] = 1e-10  # Avoid division by zero
            cm_display = cm_display / row_sums[:, np.newaxis]
            title = 'Normalized Recall Confusion Matrix'
            fmt = '.3f'
    else:
        title = 'Gesture Recognition Confusion Matrix'
        fmt = 'g'  # 'g' format works for both integers and floats

    sns.heatmap(cm_display, annot=True, fmt=fmt, cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(title)

    if save_path:
        if normalize:
            base, ext = os.path.splitext(save_path)
            if precision:
                save_path = f"{base}_precision_norm{ext}"
            else:
                save_path = f"{base}_recall_norm{ext}"

        plt.savefig(save_path, bbox_inches='tight')
        print(f"Confusion matrix saved to {save_path}")

    plt.show()

In [None]:
DATASET_PATH = '/content/drive/MyDrive/gesture/gesture_data/dataset_combined'
MODEL_PATH = '/content/drive/MyDrive/gesture/model/gesture_recognizer_ver2.task'
OUTPUT_DIR = '/content/drive/MyDrive/gesture/gesture_results'
SAMPLE_RATIO = 0.2

os.makedirs(OUTPUT_DIR, exist_ok=True)
