In [23]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Bidirectional, GRU, Dense, Dropout, Attention
from tensorflow.keras.models import Model


In [24]:
def load_data(file_path):
    df = pd.read_csv(file_path)

    X, y = [], []
    
    for name, group in df.groupby('video_name'):
        # Lấy các keypoints, bỏ cột video_name và label
        features = group.drop(columns=['video_name', 'label']).values  
        
        # Chọn 1 label duy nhất cho cả video (cách 1: lấy label đầu tiên)
        label = group['label'].iloc[0]  

        # Cách 2: chọn label phổ biến nhất
        # label = Counter(group['label']).most_common(1)[0][0] 
        
        X.append(features)
        y.append(label)

    # Padding để các video có cùng số frame
    X = pad_sequences(X, padding='post', dtype='float32')
    y = np.array(y)  # Convert label thành numpy array

    return X, y

In [25]:
X_train, y_train = load_data("dataset/train.csv")
X_test, y_test = load_data("dataset/test.csv")

print("X_train shape:", X_train.shape)  # (số video, số frame max, số keypoints*4)
print("y_train shape:", y_train.shape)  # (số video,)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_train shape: (114, 302, 36)
y_train shape: (114,)
X_test shape: (30, 259, 36)
y_test shape: (30,)


In [26]:
def build_model(input_shape, num_classes=6):
    input_layer = Input(shape=input_shape)

    # Bi-GRU Layer
    x = Bidirectional(GRU(64, return_sequences=True))(input_layer)
    x = Bidirectional(GRU(32, return_sequences=True))(x)

    # Attention Layer
    attention = Attention()([x, x])

    # Fully Connected Layer
    x = Dense(32, activation='relu')(attention)
    x = Dropout(0.5)(x)

    # Output Layer (6 nhãn)
    output_layer = Dense(num_classes, activation='softmax')(x)  # Multi-class classification

    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [27]:
model = build_model(input_shape=(X_train.shape[1], X_train.shape[2]), num_classes=6)
model.summary()


In [28]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_len = 302  # Chọn số frame tối đa dựa trên dataset

# Padding để các video có cùng số frame
X_train = pad_sequences(X_train, maxlen=max_len, padding='post', dtype='float32')
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', dtype='float32')

print("X_train shape:", X_train.shape)  # (số video, số frame max, số feature)
print("X_test shape:", X_test.shape)    # (số video, số frame max, số feature)


X_train shape: (114, 302, 36)
X_test shape: (30, 302, 36)


In [29]:
from tensorflow.keras.utils import to_categorical
import numpy as np

num_classes = 6  # 6 loại squat

# Chuyển đổi nhãn thành one-hot encoding
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

print("y_train shape:", y_train.shape)  # (số video, 6)
print("y_test shape:", y_test.shape)    # (số video, 6)


y_train shape: (114, 6)
y_test shape: (30, 6)


In [30]:
model.fit(X_train, y_train, epochs=50, batch_size=8, validation_split=0.2)


Epoch 1/50


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(None, 6), output.shape=(None, 302, 6)

In [None]:
predictions = model.predict(X_test)

# In kết quả dự đoán
for i in range(len(predictions)):
    print(f"Video {i+1}: Predicted Label: {np.argmax(predictions[i])}")
