# Model Phase - Phân loại phase của golf swing sử dụng BiLSTM
Mô hình sử dụng BiLSTM để dự đoán 4 phase (setup, backswing, downswing, follow-through) từ video, với Majority Filter Smoothing để tăng độ chính xác.

In [None]:
import cv2
import numpy as np
import json
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Đường dẫn tới thư mục dữ liệu
DATA_PATH = r'C:\Users\USER\Desktop\golf_swing\golf_dataset'
VIDEO_PATH = os.path.join(DATA_PATH, 'videos')
ANNOTATION_PATH = os.path.join(DATA_PATH, 'annotations')

# Hàm trích xuất tổng số frame từ video
def get_total_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f'Error: Cannot open video file {video_path}')
        return 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total_frames

# Hàm trích xuất frame từ video và gắn nhãn phase
def extract_frames_and_labels(video_file, annotation_file):
    frames = []
    labels = []
    video_path = os.path.join(VIDEO_PATH, video_file)
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f'Error: Cannot open video file {video_path}')
        return np.array([]), np.array([])
    
    try:
        with open(annotation_file, 'r') as f:
            annotations = json.load(f)
    except FileNotFoundError:
        print(f'Error: Annotation file {annotation_file} not found')
        return np.array([]), np.array([])
    except json.JSONDecodeError:
        print(f'Error: Invalid JSON format in {annotation_file}')
        return np.array([]), np.array([])
    
    phase_to_label = {'setup': 0, 'backswing': 1, 'downswing': 2, 'follow-through': 3}
    frame_count = 0
    labeled_frames = 0
    total_frames = get_total_frames(video_path)
    
    while cap.isOpened() and frame_count < total_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (128, 128))
        frames.append(frame)
        
        label = None
        for phase, info in annotations.items():
            try:
                start, end = info['start_frame'], info['end_frame']
                if start <= frame_count <= end:
                    label = phase_to_label[phase]
                    labeled_frames += 1
                    break
            except KeyError:
                print(f'Error: Invalid phase data in {annotation_file} for phase {phase}')
                continue
        if label is not None:
            labels.append(label)
        else:
            print(f'Warning: Frame {frame_count} in {video_file} has no phase label')
        frame_count += 1
    
    cap.release()
    print(f'Info: {labeled_frames}/{total_frames} frames labeled in {video_file}')
    if labeled_frames == 0:
        print(f'Error: No frames labeled in {video_file}')
    return np.array(frames), np.array(labels)

# Hàm áp dụng Majority Filter Smoothing
def apply_majority_filter(labels, window_size=5):
    smoothed_labels = []
    for i in range(len(labels)):
        start_idx = max(0, i - window_size // 2)
        end_idx = min(len(labels), i + window_size // 2 + 1)
        window = labels[start_idx:end_idx]
        majority_label = np.bincount(window).argmax()
        smoothed_labels.append(majority_label)
    return np.array(smoothed_labels)

# Load toàn bộ dữ liệu
def load_dataset():
    all_frames = []
    all_labels = []
    
    if not os.path.exists(VIDEO_PATH):
        print(f'Error: Directory {VIDEO_PATH} does not exist')
        return np.array([]), np.array([])
    
    video_files = [f for f in os.listdir(VIDEO_PATH) if f.endswith('.mp4')]
    if not video_files:
        print(f'Error: No .mp4 files found in {VIDEO_PATH}')
        return np.array([]), np.array([])
    
    for video_file in video_files:
        video_path = os.path.join(VIDEO_PATH, video_file)
        annotation_file = os.path.join(ANNOTATION_PATH, f'{video_file[:-4]}_error.json')
        if not os.path.exists(annotation_file):
            print(f'Error: No annotation file for {video_file}')
            continue
        frames, labels = extract_frames_and_labels(video_path, annotation_file)
        if len(frames) > 0 and len(labels) > 0:
            all_frames.append(frames)
            all_labels.append(labels)
        else:
            print(f'Warning: No valid frames or labels for {video_file}')
    
    if not all_frames:
        print('Error: No valid data loaded')
        return np.array([]), np.array([])
    
    return np.concatenate(all_frames), np.concatenate(all_labels)

def main():
    # Chuẩn bị dữ liệu
    frames, labels = load_dataset()
    if len(frames) == 0:
        print('Exiting due to no valid data')
        return
    
    # Chuẩn hóa giá trị pixel
    frames = frames / 255.0
    labels = to_categorical(labels, num_classes=4)  # One-hot encoding cho 4 phase

    # Reshape dữ liệu cho BiLSTM (samples, timesteps, height, width, channels)
    timesteps = 1  # Mỗi frame là một timestep
    height, width, channels = 128, 128, 3
    frames_reshaped = frames.reshape((frames.shape[0], timesteps, height, width, channels))

    # Chia tập train/test
    X_train, X_test, y_train, y_test = train_test_split(frames_reshaped, labels, test_size=0.2, random_state=42)

    # Data augmentation (trên dữ liệu hình ảnh 3D)
    datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True
    )
    # Tạo generator cho dữ liệu 5D
    def generate_augmented_data(X, y, batch_size):
        while True:
            for i in range(0, len(X), batch_size):
                batch_X = X[i:i + batch_size]
                batch_y = y[i:i + batch_size]
                batch_X_flat = batch_X.reshape(batch_X.shape[0] * batch_X.shape[1], height, width, channels)
                aug_iterator = datagen.flow(batch_X_flat, batch_size=batch_size, shuffle=False)
                batch_X_aug = next(aug_iterator)
                batch_X_aug = batch_X_aug.reshape(batch_X.shape[0], timesteps, height * width * channels)
                yield batch_X_aug, batch_y

    train_generator = generate_augmented_data(X_train, y_train, batch_size=32)
    test_generator = generate_augmented_data(X_test, y_test, batch_size=32)

    # Xây dựng mô hình BiLSTM
    model = Sequential([
        Bidirectional(LSTM(64, return_sequences=False), input_shape=(timesteps, height * width * channels)),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(4, activation='softmax')  # 4 phase
    ])

    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

    # Huấn luyện mô hình với generator
    model.fit(train_generator, steps_per_epoch=len(X_train) // 32, epochs=15, 
              validation_data=test_generator, validation_steps=len(X_test) // 32)

    # Áp dụng Majority Filter Smoothing trên tập test để đánh giá
    X_test_flat = X_test.reshape(X_test.shape[0], timesteps, height * width * channels)
    y_test_pred = model.predict(X_test_flat)
    y_test_pred_classes = np.argmax(y_test_pred, axis=1)
    y_test_smoothed = apply_majority_filter(y_test_pred_classes)
    y_test_smoothed_onehot = to_categorical(y_test_smoothed, num_classes=4)

    # Đánh giá mô hình sau smoothing
    test_loss, test_accuracy = model.evaluate(X_test_flat, y_test_smoothed_onehot)
    print(f'Test accuracy after smoothing: {test_accuracy:.4f}')

    # Lưu mô hình
    model.save('phase_classifier.h5')

if __name__ == '__main__':
    main()

## Lưu ý
- Đảm bảo thư mục dữ liệu tồn tại và chứa video (.mp4) cùng file JSON chú thích.
- Kiểm tra lỗi nếu mô hình không tải hoặc dữ liệu không hợp lệ.