In [13]:
import numpy as np
import os
import matplotlib 
import matplotlib.pyplot as plt
import time
import cv2
import mediapipe as mp

In [14]:
mp_holistic = mp.solutions.holistic #holistic model
mp_drawing = mp.solutions.drawing_utils #draw

In [None]:
def extract_keypoints(results):
    #pose = np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    #face = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    lh = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh,rh])

In [None]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) # chuyển màu bgr sang rbg
    image.flags.writeable = False # ngăn chặn chỉnh sửa ảnh để bảo toàn dữ liệu khi xử lí và giúp xử lí nhanh hơn
    results = model.process(image) # dùng model để tạo dự đoán về hình ảnh
    image.flags.writeable = True #mở lại khả năng chỉnh sửa ảnh
    image = cv2.cvtColor (image,cv2.COLOR_RGB2BGR) #chuyển màu bgr sang rbg
    return image,results

In [None]:
print("ok")

In [None]:
def draw_landmarks(image,results):
    #vẽ mặt
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
    #                          mp_drawing.DrawingSpec(color=(184, 115, 117),thickness=1,circle_radius=1),
    #                          mp_drawing.DrawingSpec(color=(184, 115, 117),thickness=1,circle_radius=1)
    #                         )
    #mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,
    #                             mp_drawing.DrawingSpec(color=(184, 115, 117),thickness=1,circle_radius=1)
    #                         )
    #vẽ tay trái
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
    #vẽ tay phải
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [80]:
DATASET_DIR = r"C:\\Users\\dat\\Downloads\\datasettest"
# Nhãn hành động
actions = ['love', 'eat', 'hello']  # Thay đổi nhãn của bạn ở đây
# Khởi tạo danh sách lưu dữ liệu và nhãn
data = []
labels = []
nth=0
# Duyệt qua từng nhãn hành động
for action in actions:
    print(action)
    stt=nth
    nth+=1
    action_dir = os.path.join(DATASET_DIR, action)
    # Kiểm tra xem thư mục hành động có tồn tại không
    if not os.path.exists(action_dir):
        print(f"Directory not found: {action_dir}")
        continue
    # Duyệt qua các file trong thư mục hành động
    for file in os.listdir(action_dir):
        print(file)
        video_path = os.path.join(action_dir, file)
        # Kiểm tra xem tệp có phải là video không
        if not video_path.endswith(".mp4"):
            print(f"Skipping non-video file: {video_path}")
            continue
        # Đọc video
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Video not found or cannot be opened: {video_path}")
            continue
        # Duyệt qua từng khung hình trong video
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
            i=0
            window=[]
            while i<60:
                i+=1
                ret, frame = cap.read()
                if not ret:
                    break
                # Thu nhỏ khung hình trước khi xử lý
                frame = cv2.resize(frame, (640, 480))
                # Chuyển đổi khung hình sang định dạng RGB (mediapipe yêu cầu RGB)
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                # Xử lý khung hình với mediapipe
                image, results = mediapipe_detection(frame, holistic)
                # Trích xuất keypoints
                keypoints = extract_keypoints(results)
                # Kiểm tra nếu keypoints có kích thước đúng (126,)
                if keypoints.shape == (126,):
                    window.append(keypoints)
                else:
                    print(f"Skipping frame in {file} due to invalid keypoints shape: {keypoints.shape}")
            data.append(window)        
            labels.append(stt)
        cap.release()       

love
1.mp4
2.mp4
3.mp4
4.mp4
5.mp4
eat
1.mp4
2.mp4
3.mp4
4.mp4
5.mp4
hello
1.mp4
2.mp4
3.mp4
4.mp4
5.mp4


In [81]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [82]:
label_map = {labels:num for num,labels in enumerate(actions)}

In [83]:
X = np.array(data)

In [84]:
actions=np.array(actions)

In [85]:
Y = to_categorical(np.array(labels)).astype(int)

In [86]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.05)

In [87]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard

In [88]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)

In [89]:
np.array(data).shape

(15, 60, 126)

In [90]:
np.array(labels).shape

(15,)

In [91]:
model = Sequential()
model.add(LSTM(64,return_sequences=True,activation='relu',input_shape=(60,126)))
model.add(LSTM(128,return_sequences=True,activation='relu'))
model.add(LSTM(64,return_sequences=False,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(actions.shape[0],activation='softmax'))

In [92]:
model.compile(optimizer='Adam',loss = 'categorical_crossentropy',metrics=['categorical_accuracy'])

In [None]:
model.fit(X_train,Y_train,epochs=2000,callbacks=[tb_callback])

Epoch 1/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step - categorical_accuracy: 0.4286 - loss: 1.0959
Epoch 2/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - categorical_accuracy: 0.4286 - loss: 1.0138
Epoch 3/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - categorical_accuracy: 0.7143 - loss: 1.3846
Epoch 4/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - categorical_accuracy: 0.6429 - loss: 1.0814
Epoch 5/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - categorical_accuracy: 0.6429 - loss: 1.0842
Epoch 6/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - categorical_accuracy: 0.5000 - loss: 1.0840
Epoch 7/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - categorical_accuracy: 0.5000 - loss: 1.0805
Epoch 8/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step 