# データ作成手順（MediaPipe Hands を使って Rock-Paper-Scissors フォルダから 3×21 次元の座標＋ラベルを作る）

想定フォルダ構成:
- Rock-Paper-Scissors/
    - rock/
    - paper/
    - scissors/
    - ...（各サブフォルダに画像が入っている）

やることの要点:
1. 画像を読み込み、MediaPipe Hands で手検出・ランドマーク取得。
2. 21点それぞれの (x, y, z) を取得して配列化。
     - 典型は shape=(21, 3)（各ランドマークに対して x,y,z）。
     - 要件の「3×21」に合わせる場合は転置して shape=(3, 21) にする。
3. クラスラベル（rock/paper/scissors）を数値にマップして保持。
4. 全データをまとめて保存（.npz/.npy/.csv 等）。

サンプルコード（実行セルに貼って使ってください）:

```python
# 必要パッケージ: mediapipe, opencv-python, numpy
import os
import cv2
import numpy as np
import mediapipe as mp

mp_hands = mp.solutions.hands

data_X = []  # 各要素は shape=(3,21) や flatten 63-d
data_y = []

label_map = {'rock':0, 'paper':1, 'scissors':2}
root = "Rock-Paper-Scissors"

with mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5) as hands:
        for label_name, label_id in label_map.items():
                folder = os.path.join(root, label_name)
                if not os.path.isdir(folder):
                        continue
                for fname in os.listdir(folder):
                        path = os.path.join(folder, fname)
                        img = cv2.imread(path)
                        if img is None:
                                continue
                        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                        results = hands.process(img_rgb)
                        if results.multi_hand_landmarks:
                                lm = results.multi_hand_landmarks[0].landmark  # 21 landmarks
                                arr = np.array([[p.x, p.y, p.z] for p in lm])  # shape (21,3)
                                arr = arr.T  # -> shape (3,21) to satisfy "3×21"
                                data_X.append(arr)
                                data_y.append(label_id)
                        else:
                                # ランドマーク検出できない画像はスキップ（必要なら代替処理）
                                continue

# numpy 配列に変換
X = np.stack(data_X)  # shape (N, 3, 21)
y = np.array(data_y)  # shape (N,)

# 保存例
np.savez_compressed("rps_hands_3x21.npz", X=X, y=y)
# もしくはフラット化して CSV にする場合:
# X_flat = X.reshape(X.shape[0], -1)  # shape (N, 63)
# np.savetxt("rps_hands_63d.csv", np.c_[y.reshape(-1,1), X_flat], delimiter=",")
```

注意点:
- MediaPipe の x,y は正規化座標（画像幅・高さで 0..1）。絶対ピクセル座標が必要なら x*width, y*height に変換してください。z は相対深度。
- 複数手が写っている画像や検出失敗の扱い（スキップ、補完など）を方針に合わせて実装してください。
- データ増強や正規化は学習時に行うのが一般的です。

In [None]:
import os
import shutil

src_root = "Rock-Paper-Scissors"
dst_root = "Rock-Paper-Scissors-merged"

classes = ["rock", "paper", "scissors"]
splits = ["train", "test", "validation"]

# 出力先フォルダ作成
for cls in classes:
    os.makedirs(os.path.join(dst_root, cls), exist_ok=True)

# 各split内のクラスフォルダから画像をコピー
for split in splits:
    split_path = os.path.join(src_root, split)
    if not os.path.isdir(split_path):
        continue
    for cls in classes:
        cls_path = os.path.join(split_path, cls)
        if not os.path.isdir(cls_path):
            continue
        for fname in os.listdir(cls_path):
            src_file = os.path.join(cls_path, fname)
            # ファイル名の重複を避けるためにsplit名をプレフィックスに追加
            dst_file = os.path.join(dst_root, cls, f"{split}_{fname}")
            shutil.copy2(src_file, dst_file)

print("完了: Rock-Paper-Scissors-merged に統合しました")
for cls in classes:
    count = len(os.listdir(os.path.join(dst_root, cls)))
    print(f"  {cls}: {count} 枚")

完了: Rock-Paper-Scissors-merged に統合しました
  rock: 964 枚
  paper: 964 枚
  scissors: 964 枚


In [2]:
import cv2
import numpy as np
import mediapipe as mp

mp_hands = mp.solutions.hands

data_X = []
data_y = []

label_map = {"rock": 0, "paper": 1, "scissors": 2}

with mp_hands.Hands(
    static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5
) as hands:
    for label_name, label_id in label_map.items():
        folder = os.path.join(dst_root, label_name)
        if not os.path.isdir(folder):
            continue
        for fname in os.listdir(folder):
            path = os.path.join(folder, fname)
            img = cv2.imread(path)
            if img is None:
                continue
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = hands.process(img_rgb)
            if results.multi_hand_landmarks:
                lm = results.multi_hand_landmarks[0].landmark
                arr = np.array([[p.x, p.y, p.z] for p in lm])  # shape (21, 3)
                arr = arr.T  # shape (3, 21)
                data_X.append(arr)
                data_y.append(label_id)

# numpy配列に変換
X = np.stack(data_X)  # shape (N, 3, 21)
y = np.array(data_y)  # shape (N,)

# 保存
np.savez_compressed("rps_hands_3x21.npz", X=X, y=y)

print(f"データセット作成完了: {X.shape[0]} サンプル")
print(f"X shape: {X.shape}, y shape: {y.shape}")
for label_name, label_id in label_map.items():
    print(f"  {label_name}: {np.sum(y == label_id)} サンプル")

KeyboardInterrupt: 

In [3]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


c:\Users\Owner\ProgramProject\RockPaperScissors\.venv\Scripts\python.exe: No module named pip


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np

# データの準備（flattenして63次元に）
data = np.load("rps_hands_3x21.npz")
X = data["X"]  # shape (N, 3, 21)
y = data["y"]  # shape (N,)

X_flat = X.reshape(X.shape[0], -1)  # shape (N, 63)

# 訓練・テスト分割
X_train, X_test, y_train, y_test = train_test_split(
    X_flat, y, test_size=0.2, random_state=42, stratify=y
)

# 標準化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# k-NNモデルの学習
k = 5
knn = KNeighborsClassifier(n_neighbors=k, metric="euclidean")
knn.fit(X_train_scaled, y_train)

# テストデータで精度確認
accuracy = knn.score(X_test_scaled, y_test)
print(f"k-NN 精度: {accuracy:.4f}")


# 不正な手の検出用：k近傍への平均距離を計算
def predict_with_rejection(model, scaler, X_new, threshold):
    """
    予測と同時に、距離が閾値を超えたら「不正な手(ラベル3)」として返す
    """
    X_scaled = scaler.transform(X_new.reshape(1, -1) if X_new.ndim == 1 else X_new)
    distances, indices = model.kneighbors(X_scaled)
    mean_distances = distances.mean(axis=1)

    predictions = model.predict(X_scaled)
    # 閾値を超えた場合は不正な手（ラベル3）
    predictions = np.where(mean_distances > threshold, 3, predictions)

    return predictions, mean_distances


# 訓練データでの距離分布から閾値を決定
train_distances, _ = knn.kneighbors(X_train_scaled)
train_mean_distances = train_distances.mean(axis=1)

# 閾値：訓練データの距離の95パーセンタイル + マージン
threshold = np.percentile(train_mean_distances, 95) * 1.5
print(f"不正な手判定の距離閾値: {threshold:.4f}")
print(
    f"訓練データ距離の統計: mean={train_mean_distances.mean():.4f}, std={train_mean_distances.std():.4f}"
)

# モデルと閾値を保存用に辞書化
rps_classifier = {
    "model": knn,
    "scaler": scaler,
    "threshold": threshold,
    "label_map": {0: "rock", 1: "paper", 2: "scissors", 3: "invalid"},
}

print("\n分類器の準備完了")
print(f"ラベルマップ: {rps_classifier['label_map']}")

k-NN 精度: 1.0000
不正な手判定の距離閾値: 2.7722
訓練データ距離の統計: mean=0.7764, std=0.5607

分類器の準備完了
ラベルマップ: {0: 'rock', 1: 'paper', 2: 'scissors', 3: 'invalid'}


In [3]:
import joblib

# モデル、スケーラー、閾値、ラベルマップをまとめて保存
joblib.dump(rps_classifier, "rps_classifier.pkl")

print("分類器を 'rps_classifier.pkl' に保存しました")

# 読み込み確認（再起動後はこのコードで復元可能）
# loaded_classifier = joblib.load("rps_classifier.pkl")
# knn = loaded_classifier["model"]
# scaler = loaded_classifier["scaler"]
# threshold = loaded_classifier["threshold"]
# label_map = loaded_classifier["label_map"]

分類器を 'rps_classifier.pkl' に保存しました


In [4]:
import cv2
import numpy as np
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# 分類器の取得
model = rps_classifier["model"]
scaler_rps = rps_classifier["scaler"]
threshold_rps = rps_classifier["threshold"]
label_map_rps = rps_classifier["label_map"]

cap = cv2.VideoCapture(0)

with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
) as hands:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        label_text = "No hand detected"

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # ランドマーク描画
                mp_drawing.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS
                )

                # 21点の座標を取得
                lm = hand_landmarks.landmark
                arr = np.array([[p.x, p.y, p.z] for p in lm])  # shape (21, 3)
                arr = arr.T  # shape (3, 21)
                X_new = arr.flatten()  # shape (63,)

                # スケーリングして予測
                X_scaled = scaler_rps.transform(X_new.reshape(1, -1))
                distances, _ = model.kneighbors(X_scaled)
                mean_distance = distances.mean()

                if mean_distance > threshold_rps:
                    pred_label = 3  # invalid
                else:
                    pred_label = model.predict(X_scaled)[0]

                label_text = f"{label_map_rps[pred_label]} (dist: {mean_distance:.2f})"

        # 結果を表示
        cv2.putText(
            frame, label_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 3
        )
        cv2.imshow("Rock Paper Scissors", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()