In [3]:
#Capturing frame and landmarks from data
import cv2
import mediapipe as mp
import csv

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

# Open your training video
cap = cv2.VideoCapture('walk3.avi')

# Create a CSV file to store landmarks
with open('pose_landmarks.csv', mode='a', newline='') as f:
    csv_writer = csv.writer(f)
    
    # Define CSV header (add labels later)
    headers = ['frame', 'label']
    for i in range(33):  # MediaPipe Pose has 33 landmarks
        headers += [f'x_{i}', f'y_{i}', f'z_{i}', f'visibility_{i}']
    csv_writer.writerow(headers)

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Process the frame to get pose landmarks
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(image_rgb)

        if results.pose_landmarks:
            # Prepare row data for CSV
            row = [frame_count, 'label_here']  # Use a placeholder label initially
            
            for landmark in results.pose_landmarks.landmark:
                row.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])
            
            # Write the row to CSV
            csv_writer.writerow(row)
        
        frame_count += 1
    print(frame_count)
    cap.release()
pose.close()

538


In [5]:
#Normalizing the data
import pandas as pd

# Load the CSV with pose landmarks and labels
df = pd.read_csv('pose_landmarks.csv')

# Normalize landmarks by the position of the left hip (landmark 23)
for i in range(33):
    df[f'x_{i}'] -= df['x_23']  # Normalizing by left hip x-coordinate
    df[f'y_{i}'] -= df['y_23']  # Normalizing by left hip y-coordinate

# Save the normalized data
df.to_csv('pose_landmarks_normalized.csv',index=False)

In [6]:
#Split into Training and Testing Sets
from sklearn.model_selection import train_test_split

# Load the normalized data
df = pd.read_csv('pose_landmarks_normalized.csv')

# Separate features (landmarks) and labels
X = df.drop(columns=['frame', 'label'])
y = df['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
#Training a basic classifier using TensorFlow/Keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np

# Load your labeled landmark data
# X: the input features (pose landmarks), y: the pose labels
# Assuming data is already in numpy format

X = np.array(X, dtype='float32')  # Ensure the data is in numpy format and type float32
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Define a simple neural network model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(set(y)), activation='softmax')  # Output layer
])
'''# Define a neural network model with the correct input shape 
model = tf.keras.models.Sequential([ 
    tf.keras.layers.Dense(64, activation='relu', input_shape=(99,)), 
    tf.keras.layers.Dense(32, activation='relu'), 
    tf.keras.layers.Dense(len(set(y)), activation='softmax') # Output layer 
])'''

# Compile and train the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

model_path = r'C:\Users\dipik\AppData\Local\Programs\Microsoft VS Code\my_model.keras' 
model.save(model_path) 
print(f"Model saved to {model_path}")

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:',test_acc)

Epoch 1/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8544 - loss: 0.5575 - val_accuracy: 0.9388 - val_loss: 0.1717
Epoch 2/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9476 - loss: 0.1314 - val_accuracy: 0.9640 - val_loss: 0.1402
Epoch 3/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9654 - loss: 0.1094 - val_accuracy: 0.9604 - val_loss: 0.1127
Epoch 4/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9714 - loss: 0.0885 - val_accuracy: 0.9748 - val_loss: 0.1010
Epoch 5/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9749 - loss: 0.0657 - val_accuracy: 0.9712 - val_loss: 0.0848
Epoch 6/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9793 - loss: 0.0629 - val_accuracy: 0.9892 - val_loss: 0.0824
Epoch 7/20
[1m35/35[0m [32m━━━━━━━━━━

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model(r'C:\Users\dipik\AppData\Local\Programs\Microsoft VS Code\my_model.keras')  # Make sure to provide the correct path to your model

# Define label map (update with your specific labels)
label_map = {0: "smile", 1: "clap", 2: "walk"}  # Adjust based on your model's output labels

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Open the video capture
video_path = r'C:\Users\dipik\AppData\Local\Programs\Microsoft VS Code\walk4.avi'
cap = cv2.VideoCapture(video_path)

# Use Pose with the proper confidence parameters
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("End of video or no frame to capture.")
            break
        
        # Convert BGR image to RGB for MediaPipe
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Process the image and detect pose landmarks
        results = pose.process(image)

        # Draw landmarks on the original BGR image
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.pose_landmarks:
            # Draw landmarks
            mp_drawing.draw_landmarks(
                image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
            )
            
            # Prepare landmarks for the model
            landmarks = []
            for lm in results.pose_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z, lm.visibility])  # Append each landmark as x, y, z, visibility

            # Convert landmarks to numpy array and reshape for the model
            landmarks = np.array(landmarks).reshape(1, -1)  # Shape (1, N) where N is the total landmarks data
            
            # Predict the pose
            prediction = model.predict(landmarks)
            predicted_label = np.argmax(prediction)  # Get the index of the highest probability
            label_text = label_map[predicted_label]  # Map the index to a human-readable label
            
            # Display the label on the video
            cv2.putText(image, f"Pose: {label_text}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 
                        1, (255, 255, 255), 2, cv2.LINE_AA)

        # Display the image with the pose label
        cv2.imshow("Pose Detection", image)

        # Exit with 'q' key
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13