In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install mediapipe
!pip install argparse
!pip install pathlib
!pip install tqdm
!pip install cv2
!pip install AdvancedFeatureExtractor

Collecting argparse
  Using cached argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)
Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


^C


In [16]:
# scripts/build_feature_dataset.py
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import argparse
from pathlib import Path
from tqdm import tqdm
import math

# --- Helper Functions ---

def calculate_3point_angle(a, b, c):
    """Tính góc tạo bởi 3 điểm a, b, c (b là đỉnh). Tọa độ pixel."""
    try:
        ba = np.array(a) - np.array(b)
        bc = np.array(c) - np.array(b)
        cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
        angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
        return np.degrees(angle)
    except (ValueError, ZeroDivisionError):
        return None

def calculate_inclination_angle(p1, p2):
    """Tính góc nghiêng của đường thẳng p1-p2 so với trục ngang. Tọa độ pixel."""
    try:
        delta_y = p2[1] - p1[1]
        delta_x = p2[0] - p1[0]
        angle = np.degrees(math.atan2(delta_y, delta_x))
        return angle
    except (ValueError, ZeroDivisionError):
        return None

def get_landmark_coords(landmarks, landmark_enum, shape):
    """Lấy tọa độ pixel từ landmark enum."""
    try:
        lm = landmarks.landmark[landmark_enum]
        if lm.visibility < 0.5:
            return None
        return (int(lm.x * shape[1]), int(lm.y * shape[0]))
    except (IndexError, KeyError):
        return None

# --- Main Class ---

class FeatureDatasetBuilder:
    def __init__(self, output_csv_path):
        self.output_csv_path = Path(output_csv_path)
        self.mp_pose = mp.solutions.pose
        self.pose = self.mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.6, model_complexity=2)

    def process_videos_in_directory(self, root_dir):
        video_paths = list(Path(root_dir).rglob('*.mp4'))
        all_features = []

        for video_path in tqdm(video_paths, desc="Building Feature Dataset"):
            frame = self._extract_frame(video_path)
            if frame is None:
                continue

            results = self.pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

            if results.pose_landmarks:
                geometric_features = self._extract_geometric_features(results.pose_landmarks, frame.shape)
                if geometric_features:
                    labels = self._get_labels_from_path(video_path, root_dir)
                    record = {**labels, **geometric_features, 'source_file': str(video_path)}
                    all_features.append(record)

        df = pd.DataFrame(all_features)
        df.to_csv(self.output_csv_path, index=False)
        print(f"\nFeature dataset built successfully. Data saved to {self.output_csv_path}")
        return df

    def _extract_frame(self, video_path):
        cap = cv2.VideoCapture(str(video_path))
        if not cap.isOpened(): return None
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        mid_frame_idx = max(0, frame_count // 2)
        cap.set(cv2.CAP_PROP_POS_FRAMES, mid_frame_idx)
        ret, frame = cap.read()
        cap.release()
        return frame if ret else None

    def _get_labels_from_path(self, video_path, root_dir):
        relative_path = video_path.relative_to(root_dir)
        parts = relative_path.parts
        return {
            'view': parts[0],
            'quality': parts[1],
            'label': 1 if 'Good' in parts[1] else 0,
            'category': parts[2]
        }

    def _extract_geometric_features(self, landmarks, shape):
        lm_enum = self.mp_pose.PoseLandmark
        coords = {name: get_landmark_coords(landmarks, lm_enum[name], shape) for name in lm_enum._member_map_}

        # --- Kiểm tra sự tồn tại của các keypoints tối quan trọng ---
        required_landmarks = [
            "LEFT_SHOULDER", "RIGHT_SHOULDER", "LEFT_HIP", "RIGHT_HIP",
            "LEFT_KNEE", "RIGHT_KNEE", "LEFT_ANKLE", "RIGHT_ANKLE",
            "LEFT_WRIST", "LEFT_ELBOW", "RIGHT_WRIST", "RIGHT_ELBOW"
        ]
        if any(coords[name] is None for name in required_landmarks):
            return {}

        features = {}

        # --- Góc khớp (từ process_swing.py) ---
        features['left_arm_angle'] = calculate_3point_angle(coords["LEFT_WRIST"], coords["LEFT_ELBOW"], coords["LEFT_SHOULDER"])
        features['right_arm_angle'] = calculate_3point_angle(coords["RIGHT_WRIST"], coords["RIGHT_ELBOW"], coords["RIGHT_SHOULDER"])
        features['left_knee_angle'] = calculate_3point_angle(coords["LEFT_HIP"], coords["LEFT_KNEE"], coords["LEFT_ANKLE"])
        features['right_knee_angle'] = calculate_3point_angle(coords["RIGHT_HIP"], coords["RIGHT_KNEE"], coords["RIGHT_ANKLE"])
        features['left_hip_angle'] = calculate_3point_angle(coords["LEFT_SHOULDER"], coords["LEFT_HIP"], coords["LEFT_KNEE"])
        features['right_hip_angle'] = calculate_3point_angle(coords["RIGHT_SHOULDER"], coords["RIGHT_HIP"], coords["RIGHT_KNEE"])

        # --- Góc nghiêng (từ MediaPipe_class.py) ---
        features['shoulders_inclination'] = calculate_inclination_angle(coords["LEFT_SHOULDER"], coords["RIGHT_SHOULDER"])
        features['hips_inclination'] = calculate_inclination_angle(coords["LEFT_HIP"], coords["RIGHT_HIP"])

        # --- Góc xoay thân (từ process_swing.py) ---
        features['pelvis_angle'] = calculate_3point_angle(coords["LEFT_ANKLE"], coords["LEFT_HIP"], coords["RIGHT_SHOULDER"])

        # --- Đặc trưng về vị trí (từ process_swing.py) ---
        midpoint_ankles_x = (coords["LEFT_ANKLE"][0] + coords["RIGHT_ANKLE"][0]) / 2
        wrist_midpoint_offset_x = coords["LEFT_WRIST"][0] - midpoint_ankles_x

        shoulder_width = np.linalg.norm(np.array(coords["LEFT_SHOULDER"]) - np.array(coords["RIGHT_SHOULDER"]))
        if shoulder_width > 0:
            features['wrist_midpoint_offset_x_ratio'] = wrist_midpoint_offset_x / shoulder_width

        return {k: v for k, v in features.items() if v is not None}

In [17]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Extract hybrid pose features from golf swing videos.")
    parser.add_argument("--root_dir", required=True, help="Root directory of the dataset.")
    parser.add_argument("--output_csv", default="results/golf_swing_features_hybrid.csv", help="Path to save the output CSV.")

    import sys
    if 'google.colab' in sys.modules:
        root_path = "/content/drive/MyDrive/image/CUSTOM_DATASET"
        output_path = "/content/drive/MyDrive/image/golf_swing_features_hybrid.csv"

        print(f"Running on Colab.")
        print(f"Dataset root directory: {root_path}")
        print(f"Output CSV path: {output_path}")

        args = parser.parse_args([
            '--root_dir', root_path,
            '--output_csv', output_path
        ])
    else:
        args = parser.parse_args()

    Path(args.output_csv).parent.mkdir(parents=True, exist_ok=True)

    extractor = HybridFeatureExtractor(args.output_csv)
    extractor.process_videos_in_directory(args.root_dir)

Running on Colab.
Dataset root directory: /content/drive/MyDrive/image/CUSTOM_DATASET
Output CSV path: /content/drive/MyDrive/image/golf_swing_features_hybrid.csv


Extracting Hybrid Features: 100%|██████████| 361/361 [23:30<00:00,  3.91s/it]


Hybrid feature extraction complete. Data saved to /content/drive/MyDrive/image/golf_swing_features_hybrid.csv





In [18]:
# notebooks/Vong1_Classification_PoC.ipynb

# Cell 1: Cài đặt và Imports
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

# Cell 2: Tải và Khám phá Dữ liệu Đặc trưng
# (Giả sử script extract_features.py đã chạy và tạo ra file csv)
df = pd.read_csv('/content/drive/MyDrive/image/golf_swing_features_hybrid.csv')

print("Shape of the dataset:", df.shape)
print("\nLabel distribution:")
print(df['quality'].value_counts())
df.head()

# Cell 3: Phân tích Dữ liệu Khám phá (EDA) - Rất quan trọng để gây ấn tượng!
import plotly.express as px
df_features = pd.read_csv('/content/drive/MyDrive/image/golf_swing_features_hybrid.csv')

# So sánh góc gập gối trái
fig = px.box(df_features, x='quality', y='left_knee_angle', color='quality',
             title='So sánh góc Gối Trái giữa Swing Tốt và Xấu',
             labels={'left_knee_angle': 'Góc Gối Trái (độ)', 'quality': 'Chất lượng Swing'})
fig.show()
# -> Phân tích: "Từ biểu đồ, ta thấy các cú swing 'Tốt' có góc gập gối trái trung bình khoảng 160 độ, trong khi các cú 'Xấu' có phân phối rộng hơn và nhiều giá trị ngoại lai hơn, cho thấy sự thiếu ổn định."
# -> Nhận xét: Có thể thấy các cú swing "Tốt" có xu hướng gom cụm vị trí cổ tay chặt chẽ hơn.

# Cell 4: Chuẩn bị Dữ liệu cho Huấn luyện
# Bỏ các cột không phải là feature
features_df = df.drop(columns=['file_path', 'view', 'quality', 'label', 'category'])
# Bỏ các keypoints có visibility thấp (tùy chọn nhưng nên làm)
# visibility_cols = [col for col in features_df.columns if col.endswith('_v')]
# ... (logic loại bỏ)

target = df['label']

# Xử lý NaN (nếu MediaPipe không tìm thấy người)
features_df.fillna(features_df.mean(), inplace=True)

# Chia tập train/test
X_train, X_test, y_train, y_test = train_test_split(
    features_df, target, test_size=0.2, random_state=42, stratify=target
)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Training set size:", X_train_scaled.shape)
print("Test set size:", X_test_scaled.shape)

# Cell 5: Huấn luyện và Đánh giá Mô hình
# Sử dụng Logistic Regression làm baseline
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)

# Đánh giá trên tập test
y_pred = model.predict(X_test_scaled)

# Metrics
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Bad Swing', 'Good Swing'])
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(report)

# Trực quan hóa Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Bad Swing', 'Good Swing'], yticklabels=['Bad Swing', 'Good Swing'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Cell 6: Lưu Mô hình và Scaler để sử dụng sau
joblib.dump(model, '../models/swing_classifier_v1.joblib')
joblib.dump(scaler, '../models/scaler_v1.joblib')
print("Model and scaler saved successfully.")


import matplotlib.pyplot as plt

# Lấy feature importance từ mô hình Random Forest
importances = rf_model.feature_importances_
feature_names = X_train.columns
feature_importance_df = pd.DataFrame({'feature': feature_names, 'importance': importances})
feature_importance_df = feature_importance_df.sort_values('importance', ascending=False)

# Trực quan hóa
plt.figure(figsize=(12, 8))
sns.barplot(x='importance', y='feature', data=feature_importance_df)
plt.title('Mức độ quan trọng của các Đặc trưng')
plt.show()

# -> Phân tích: "Kết quả cho thấy 'left_arm_angle' và 'hip_width_ratio' là hai đặc trưng quan trọng nhất để phân biệt swing tốt/xấu, điều này hoàn toàn phù hợp với lý thuyết huấn luyện golf."

Shape of the dataset: (693, 16)

Label distribution:
quality
Bad Swings     579
Good Swings    114
Name: count, dtype: int64


TypeError: Could not convert ['Bad Putting Posture HunchedBad Putting Posture HunchedBad Putting Posture StraightBad Putting Posture StraightaddresstopcontactaddresstopcontactaddresstopcontactBad Near To BalladdresstopcontactBad Near To BallBad Near To BalladdresstopcontactBad Straight BodyBad Straight BodyaddresstopcontactBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyBad Straight BodyaddresstopcontactaddresstopcontactaddresstopcontactBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureBad Knee PostureaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactGood Body Posture and Ball DistanceaddresstopcontactaddresstopcontactGood Body Posture and Ball DistanceGood Knee PostureGood Knee PostureGood Knee PostureGood Knee PostureGood Knee PostureGood Knee PostureGood Knee PostureGood Knee PostureGood Knee PostureBad Iron Wide StanceaddresstopcontactaddresstopcontactBad Elbow Posture BackswingBad Elbow Posture BackswingBad Elbow Posture BackswingaddresstopcontactaddresstopcontactaddresstopcontactBad Putting Wide StanceaddresstopcontactBad Putting Narrow StanceaddresstopcontactaddresstopcontactBad Elbow Posture FrontswingaddresstopcontactBad Iron Narrow StanceaddresstopcontactaddresstopcontactBad Iron Narrow StanceBad Iron Narrow StanceaddresstopcontactaddresstopcontactBad Chip Wide StanceaddresstopcontactBad Chin PositionBad Chin PositionBad Chin PositionBad Chin PositionBad Chin PositionBad Chin PositionaddresstopcontactaddresstopcontactaddresstopcontactGood Side IronGood Side IronGood Side IronGood Side IronaddresstopcontactaddresstopcontactaddresstopcontactaddresstopcontactGood Side IronaddresstopcontactaddresstopcontactaddresstopcontactGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingGood Elbow Posture BackswingaddresstopcontactaddresstopcontactGood Elbow Posture FrontswingGood Elbow Posture FrontswingGood Elbow Posture FrontswingGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionGood Chin PositionaddresstopcontactGood Ball PositionaddresstopcontactGood Ball Position'] to numeric