In [1]:
# Cell 2: Libraries
import mediapipe as mp
import cv2
import numpy as np
import time
import pandas as pd
import csv
import os
from matplotlib import pyplot as plt

In [3]:
# Cell 9: Delete not important data
data = pd.read_csv('combined_coordinates.csv')

# Identify columns to remove: include specific ranges and all visibility points
columns_to_remove_1 = [f"pose_{c}{i}" for c in ['x', 'y', 'z', 'v'] for i in range(0, 11)]
columns_to_remove_2 = [f"pose_{c}{i}" for c in ['x', 'y', 'z', 'v'] for i in range(23, 33)]
visibility_columns_pose = [col for col in data.columns if 'pose_v' in col]
visibility_columns_left_hand = [col for col in data.columns if 'left_hand_v' in col]
visibility_columns_right_hand = [col for col in data.columns if 'right_hand_v' in col]

# Combine all columns to remove
columns_to_remove = columns_to_remove_1 + columns_to_remove_2 + visibility_columns_pose + visibility_columns_left_hand + visibility_columns_right_hand
columns_to_remove = [col for col in columns_to_remove if col in data.columns]

# Drop the selected columns from the dataframe
data_filtered = data.drop(columns=columns_to_remove)

# Round the values and save the filtered data
data_filtered.to_csv('filtered_coordinates.csv', index=False)

# Display the first few rows of the filtered dataset
data_filtered.head()

Unnamed: 0,class,accuracy,sequence,pose_x11,pose_y11,pose_z11,pose_x12,pose_y12,pose_z12,pose_x13,...,left_hand_z17,left_hand_x18,left_hand_y18,left_hand_z18,left_hand_x19,left_hand_y19,left_hand_z19,left_hand_x20,left_hand_y20,left_hand_z20
0,pen,1,0,0.594059,0.309059,-0.196826,0.3112,0.339124,-0.057219,0.753011,...,-0.034374,0.348,0.782419,-0.050452,0.351776,0.811713,-0.058446,0.355583,0.840286,-0.062885
1,pen,1,0,0.593844,0.309345,-0.196947,0.310788,0.339243,-0.057271,0.752674,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,pen,1,0,0.593748,0.309507,-0.19388,0.310502,0.339239,-0.066608,0.752274,...,-0.041063,0.35045,0.785777,-0.057279,0.356252,0.815158,-0.064492,0.362689,0.842664,-0.068444
3,pen,1,0,0.593448,0.309622,-0.16705,0.309933,0.339824,-0.037587,0.751536,...,-0.051957,0.354071,0.783193,-0.066484,0.361161,0.810169,-0.070669,0.369577,0.833266,-0.072963
4,pen,1,0,0.593303,0.309749,-0.187792,0.309748,0.340211,-0.036565,0.750996,...,-0.052415,0.352663,0.778692,-0.069039,0.359794,0.805952,-0.075388,0.368541,0.831612,-0.079067


In [15]:
import pandas as pd

def transform_and_merge_columns(file_path, output_file_path):
    # Load the CSV file
    data = pd.read_csv(file_path)
    
    data['accuracy'] = data['accuracy'].map({0: 'W', 1: 'R'})
    data['label'] = data.iloc[:, 0].astype(str) + '_' + data['accuracy']
    
    label_column = data.pop('label') 
    data.insert(0, 'label', label_column)  
    
    data.drop(columns=['class', 'accuracy'], inplace=True)
    data.to_csv(output_file_path, index=False)

input_file_path = 'filtered_coordinates.csv'  
output_file_path = 'modified_file.csv'  
transform_and_merge_columns(input_file_path, output_file_path)


In [None]:
# Fill columns with 0s so you have the same

In [3]:
# Read the contents of the text file
with open('unique_labels.txt', 'r') as file:
    actions = [line.strip() for line in file.readlines()]

# Print the list of actions
actions

['paint_R', 'paint_W', 'pen_R', 'pen_W', 'scissors_R', 'scissors_W']

In [4]:
from sklearn.preprocessing import LabelEncoder

# Your existing code to encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(actions)

# Now you can use features_padded and labels_encoded for training or prediction


In [8]:
# Cell 3: Check that the camera and mediapipe are working
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open video capture")
else:
    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose, \
        mp.solutions.hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                print("Error: Unable to read frame from video capture")
                break

            # Flip image to simulate mirror view
            image = cv2.flip(image, 1)

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False

            # Make detections
            pose_results = pose.process(image)
            hand_results = hands.process(image)

            # RGB 2 BGR
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw points
            if pose_results.pose_landmarks:
                mp_drawing.draw_landmarks(
                    image,
                    pose_results.pose_landmarks,
                    mp_pose.POSE_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2),
                )

            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image,
                        hand_landmarks,
                        mp.solutions.hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                        mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
                    )

            cv2.imshow("Raw Webcam Feed", image)

            if cv2.waitKey(10) & 0xFF == ord("q"):
                break

    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1715346486.807142  600678 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
I0000 00:00:1715346486.813250  600678 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
W0000 00:00:1715346486.823502  604132 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1715346486.828640  604130 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1715346486.907862  604120 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1715346486.917056  604118 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [11]:
import mediapipe as mp
import cv2
import numpy as np

# Number of frames to collect data for
num_frames_to_collect = 68

# Initialize an array to store the collected data
collected_data = []

# Setup MediaPipe instances
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open video capture")
else:
    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose, \
         mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, max_num_hands=2) as hands:
        frame_count = 0
        while cap.isOpened() and frame_count < num_frames_to_collect:
            ret, image = cap.read()
            if not ret:
                print("Error: Unable to read frame from video capture")
                break

            # Flip image to simulate mirror view
            image = cv2.flip(image, 1)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False

            # Make detections
            pose_results = pose.process(image)
            hand_results = hands.process(image)

            # Flatten pose landmarks, skipping certain indices
            if pose_results.pose_landmarks:
                pose_landmarks = [
                    round(value, 3) for idx, landmark in enumerate(pose_results.pose_landmarks.landmark)
                    if not (0 <= idx <= 11 or 23 <= idx <= 32)  # Adjust to 0-based indexing and include index 32
                    for value in (landmark.x, landmark.y, landmark.z)
                ]
            else:
                pose_landmarks = [0] * (10 * 3)  # Adjust count to reflect the remaining landmarks

            # Initialize hand landmarks placeholders
            right_hand_landmarks = [0] * (21 * 3)
            left_hand_landmarks = [0] * (21 * 3)

            # Detect and sort hand landmarks
            if hand_results.multi_hand_landmarks:
                hand_labels = [hand.classification[0].label for hand in hand_results.multi_handedness]
                for hand_landmarks, label in zip(hand_results.multi_hand_landmarks, hand_labels):
                    flat_hand = [round(value, 3) for landmark in hand_landmarks.landmark
                                 for value in (landmark.x, landmark.y, landmark.z)]
                    if label == 'Right':
                        right_hand_landmarks = flat_hand
                    else:
                        left_hand_landmarks = flat_hand

            # Combine all landmarks
            all_landmarks = pose_landmarks + right_hand_landmarks + left_hand_landmarks

            # Print the number of elements in all_landmarks for each frame
            print("Number of elements in all_landmarks:", len(all_landmarks))

            collected_data.extend(all_landmarks)  # Append to flat list

            # Drawing
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            if pose_results.pose_landmarks:
                mp_drawing.draw_landmarks(
                    image,
                    pose_results.pose_landmarks,
                    mp_pose.POSE_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2),
                )

            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                        mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
                    )

            cv2.imshow("Raw Webcam Feed with Landmarks", image)

            # Increment frame count
            frame_count += 1
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()

# Convert to numpy array
collected_data_array = np.array(collected_data)
print("Data shape:", collected_data_array.shape)

I0000 00:00:1715346688.281489  600678 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
I0000 00:00:1715346688.286270  600678 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
W0000 00:00:1715346688.291605  606890 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1715346688.295968  606890 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1715346688.376604  606871 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1715346688.384445  606871 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Number of elements in all_landmarks: 156
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elements in all_landmarks: 159
Number of elemen

In [7]:
print("Size of collected_data_array:", collected_data_array.size)


Size of collected_data_array: 10809


In [12]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Load the .h5 model
model = tf.keras.models.load_model('actions.h5')

# Inspect the model's summary to see the input shape
print("Model Summary:")
print(model.summary())

# Assuming you have prepared your input data (features_padded) and encoded labels (labels_encoded)
# If not, prepare your data based on the input shape as mentioned before

# Make predictions
predictions = model.predict(collected_data_array)

# Print predictions
print("Predictions:", predictions)

# If you want to decode the predictions back to original labels
label_decoder = LabelEncoder()
label_decoder.fit(actions)
decoded_predictions = label_decoder.inverse_transform(np.argmax(predictions, axis=1))
print("Decoded Predictions:", decoded_predictions)




Model Summary:


None


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(32,), dtype=float32). Expected shape (None, 68, 162), but input has incompatible shape (32,)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32,), dtype=float32)
  • training=False
  • mask=None

In [None]:
# Cell 10: Create folders in directory
data = pd.read_csv('filtered_coordinates.csv')

base_dir = 'DataBase'
if not os.path.exists(base_dir):
    os.makedirs(base_dir)
unique_classes = data['class'].unique()

for class_value in unique_classes:
    class_dir = os.path.join(base_dir, str(class_value))
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)
    
    class_data = data[data['class'] == class_value]

    for accuracy_type, subfolder_name in zip([1, 0], ['R', 'W']):
        accuracy_dir = os.path.join(class_dir, subfolder_name)
        if not os.path.exists(accuracy_dir):
            os.makedirs(accuracy_dir)
        
        accuracy_data = class_data[class_data['accuracy'] == accuracy_type]
        
        unique_sequences = accuracy_data['sequence'].unique()
        
        for sequence in unique_sequences:
            sequence_dir = os.path.join(accuracy_dir, str(sequence))
            if not os.path.exists(sequence_dir):
                os.makedirs(sequence_dir)

In [None]:
# Cell 11: Save individual np arrays
data = pd.read_csv('filtered_coordinates.csv')
base_dir = 'DataBase'

if not os.path.exists(base_dir):
    os.makedirs(base_dir)

for class_value in data['class'].unique():
    class_dir = os.path.join(base_dir, str(class_value))
    class_data = data[data['class'] == class_value]

    for accuracy_type, subfolder_name in zip([1, 0], ['R', 'W']):
        accuracy_dir = os.path.join(class_dir, subfolder_name)
        accuracy_data = class_data[class_data['accuracy'] == accuracy_type]

        for sequence in accuracy_data['sequence'].unique():
            sequence_dir = os.path.join(accuracy_dir, str(sequence))
            sequence_data = accuracy_data[accuracy_data['sequence'] == sequence]

            sequence_data = sequence_data.reset_index(drop=True)

            for index, row in sequence_data.iterrows():
                frame_path = os.path.join(sequence_dir, f'{index}.npy')
                np.save(frame_path, row.values[3:])

print("cosas")

In [None]:
# Cell 12: Test if array is in the correct way
np.load('DataBase/pen/W/0/0.npy', allow_pickle=True)