In [None]:
# Cell 1: Install libraries
!pip install mediapipe opencv-python pandas scikit-learn numpy matplotlib

In [3]:
# Cell 2: Libraries
import mediapipe as mp
import cv2
import numpy as np
import time
import pandas as pd
import csv
import os
from matplotlib import pyplot as plt

In [None]:
# Cell 3: Check that the camera and mediapipe are working
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open video capture")
else:
    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose, \
        mp.solutions.hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                print("Error: Unable to read frame from video capture")
                break

            # Flip image to simulate mirror view
            image = cv2.flip(image, 1)

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False

            # Make detections
            pose_results = pose.process(image)
            hand_results = hands.process(image)

            # RGB 2 BGR
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw points
            if pose_results.pose_landmarks:
                mp_drawing.draw_landmarks(
                    image,
                    pose_results.pose_landmarks,
                    mp_pose.POSE_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2),
                )

            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image,
                        hand_landmarks,
                        mp.solutions.hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                        mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
                    )

            cv2.imshow("Raw Webcam Feed", image)

            if cv2.waitKey(10) & 0xFF == ord("q"):
                break

    cap.release()
    cv2.destroyAllWindows()


In [None]:
# Cell 4: Record video for data capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open video capture")
else:
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30 
    print(f"Video Resolution: {width}x{height} at {fps} FPS")

    # Define the codec
    fourcc = cv2.VideoWriter_fourcc(*'XVID') 
    out = cv2.VideoWriter('output.avi', fourcc, fps, (width, height))

    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Error: Unable to read frame from video capture")
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_rgb.flags.writeable = False
        results = hands.process(frame_rgb)  
        frame_rgb.flags.writeable = True
        frame = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

        # Write every frame
        out.write(frame)

        cv2.imshow('MediaPipe Hands', frame)  
        if cv2.waitKey(5) & 0xFF == ord('q'): 
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

In [5]:
# Cell 5: Create CSV to store data
headers = ['class', 'accuracy', 'sequence'] 
headers.extend([f'pose_{coord}{i}' for i in range(33) for coord in ('x', 'y', 'z', 'v')])
headers.extend([f'{hand}_{coord}{i}' for hand in ('right_hand', 'left_hand') for i in range(21) for coord in ('x', 'y', 'z', 'v')])

with open('coordinates_1.csv', mode='w', newline='') as file:
    csv_writer = csv.writer(file)
    csv_writer.writerow(headers)

In [6]:
# Cell 6: Capture data for different 'movements'
cap = cv2.VideoCapture('_.mp4')
if not cap.isOpened():
    print("Error: Unable to open video capture")
else:
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('processed_output.avi', fourcc, fps, (width, height))

    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    mp_drawing = mp.solutions.drawing_utils
    pose_drawing_spec = mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4)
    hand_drawing_spec = mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)

    record = False
    accuracy = None
    sequences = {'r': -1, 'w': -1}  # Separate counters for 'r' and 'w'
    recording_state = None

    with open('coordinates_1.csv', mode='a', newline='') as file:
        csv_writer = csv.writer(file)

        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                print("Error: Unable to read frame from video capture")
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_rgb.flags.writeable = False
            pose_results = pose.process(frame_rgb)
            hand_results = hands.process(frame_rgb)
            frame_rgb.flags.writeable = True
            frame = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

            if pose_results.pose_landmarks:
                mp_drawing.draw_landmarks(
                    frame,
                    pose_results.pose_landmarks,
                    mp_pose.POSE_CONNECTIONS,
                    landmark_drawing_spec=pose_drawing_spec,
                    connection_drawing_spec=pose_drawing_spec)

            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        frame,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        landmark_drawing_spec=hand_drawing_spec,
                        connection_drawing_spec=hand_drawing_spec)

            # Select the sequence number based on the key pressed
            current_sequence = sequences[recording_state] if recording_state else -1
            row = ['', accuracy, current_sequence]

            if pose_results.pose_landmarks:
                for lm in pose_results.pose_landmarks.landmark:
                    visibility_binary = 1 if lm.visibility > 0.3 else 0
                    row.extend([lm.x, lm.y, lm.z, visibility_binary])
            else:
                row.extend([0] * 33 * 4)

            for hand in ('right_hand', 'left_hand'):
                found = False
                if hand_results.multi_hand_landmarks:
                    for hand_landmarks, handedness in zip(hand_results.multi_hand_landmarks, hand_results.multi_handedness):
                        if handedness.classification[0].label == ('Right' if hand == 'right_hand' else 'Left'):
                            for lm in hand_landmarks.landmark:
                                visibility_binary = 1 if lm.visibility > 0.2 else 0
                                row.extend([lm.x, lm.y, lm.z, visibility_binary])
                            found = True
                            break
                if not found:
                    row.extend([0] * 21 * 4)

            if record:
                csv_writer.writerow(row)

            out.write(frame)
            cv2.imshow('MediaPipe Pose', frame)
            key = cv2.waitKey(5) & 0xFF

            if key == ord('r') or key == ord('w'):
                new_state = chr(key)
                if new_state != recording_state:
                    sequences[new_state] += 1  # Update the specific sequence counter
                    recording_state = new_state
                record = True
                accuracy = 1 if key == ord('r') else 0
            elif key == ord('s'):
                record = False
                recording_state = None
            elif key == ord('q'):
                break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

df = pd.read_csv('coordinates_1.csv')
df.loc[df['accuracy'].notna(), 'class'] = 'movement_1'
df.to_csv('coordinates_1.csv', index=False)


I0000 00:00:1715176851.237102  241696 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
I0000 00:00:1715176851.241943  241696 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
  df.loc[df['accuracy'].notna(), 'class'] = 'movement_1'


In [8]:
# Cell 7: Capture data for different 'movements' and output to a new CSV without headers
cap = cv2.VideoCapture('_.mp4')
if not cap.isOpened():
    print("Error: Unable to open video capture")
else:
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('processed_output.avi', fourcc, fps, (width, height))

    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    mp_drawing = mp.solutions.drawing_utils
    pose_drawing_spec = mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4)
    hand_drawing_spec = mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)

    record = False
    accuracy = None
    sequences = {'r': -1, 'w': -1}  # Separate counters for 'r' and 'w'
    recording_state = None

    with open('coordinates_2.csv', mode='a', newline='') as file:
        csv_writer = csv.writer(file, lineterminator='\n')

        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                print("Error: Unable to read frame from video capture")
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_rgb.flags.writeable = False
            pose_results = pose.process(frame_rgb)
            hand_results = hands.process(frame_rgb)
            frame_rgb.flags.writeable = True
            frame = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

            if pose_results.pose_landmarks:
                mp_drawing.draw_landmarks(
                    frame,
                    pose_results.pose_landmarks,
                    mp_pose.POSE_CONNECTIONS,
                    landmark_drawing_spec=pose_drawing_spec,
                    connection_drawing_spec=pose_drawing_spec)

            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        frame,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        landmark_drawing_spec=hand_drawing_spec,
                        connection_drawing_spec=hand_drawing_spec)

            current_sequence = sequences[recording_state] if recording_state else -1
            row = ['', accuracy, current_sequence]

            if pose_results.pose_landmarks:
                for lm in pose_results.pose_landmarks.landmark:
                    visibility_binary = 1 if lm.visibility > 0.3 else 0
                    row.extend([lm.x, lm.y, lm.z, visibility_binary])
            else:
                row.extend([0] * 33 * 4)

            for hand in ('right_hand', 'left_hand'):
                found = False
                if hand_results.multi_hand_landmarks:
                    for hand_landmarks, handedness in zip(hand_results.multi_hand_landmarks, hand_results.multi_handedness):
                        if handedness.classification[0].label == ('Right' if hand == 'right_hand' else 'Left'):
                            for lm in hand_landmarks.landmark:
                                visibility_binary = 1 if lm.visibility > 0.2 else 0
                                row.extend([lm.x, lm.y, lm.z, visibility_binary])
                            found = True
                            break
                if not found:
                    row.extend([0] * 21 * 4)

            if record:
                csv_writer.writerow(row)

            out.write(frame)
            cv2.imshow('MediaPipe Pose', frame)
            key = cv2.waitKey(5) & 0xFF

            if key == ord('r') or key == ord('w'):
                new_state = chr(key)
                if new_state != recording_state:
                    sequences[new_state] += 1  # Update the specific sequence counter
                    recording_state = new_state
                record = True
                accuracy = 1 if key == ord('r') else 0
            elif key == ord('s'):
                record = False
                recording_state = None
            elif key == ord('q'):
                break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

df = pd.read_csv('coordinates_2.csv', header=None)
df.loc[df[1].notna(), 0] = 'movement_2'
df.to_csv('coordinates_2.csv', header=False, index=False)


I0000 00:00:1715177869.980019  241696 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
I0000 00:00:1715177869.984994  241696 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
  df.loc[df[1].notna(), 0] = 'movement_2'


In [9]:
# Cell 8: Join all the CSVs of the different movements
df1 = pd.read_csv('coordinates_1.csv', header=None)
df2 = pd.read_csv('coordinates_2.csv', header=None)

combined_df = pd.concat([df1, df2], axis=0, ignore_index=True)
combined_path = 'combined_coordinates.csv'
combined_df.to_csv(combined_path, header=False, index=False)

In [19]:
# Cell 9: Delete not important data
data = pd.read_csv('combined_coordinates.csv')

columns_to_remove_1 = [f"pose_{c}{i}" for c in ['x', 'y', 'z', 'v'] for i in range(0, 11)]
columns_to_remove_2 = [f"pose_{c}{i}" for c in ['x', 'y', 'z', 'v'] for i in range(23, 33)]
columns_to_remove = [col for col in (columns_to_remove_1 + columns_to_remove_2) if col in data.columns]

data_filtered = data.drop(columns=columns_to_remove)
data_filtered = data_filtered.round(3)
data_filtered.to_csv('filtered_coordinates.csv', index=False)
data_filtered.head()

Unnamed: 0,class,accuracy,sequence,pose_x11,pose_y11,pose_z11,pose_v11,pose_x12,pose_y12,pose_z12,...,left_hand_z18,left_hand_v18,left_hand_x19,left_hand_y19,left_hand_z19,left_hand_v19,left_hand_x20,left_hand_y20,left_hand_z20,left_hand_v20
0,movement_1,0,0,0.354,0.46,-0.585,1,0.157,0.573,-0.57,...,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0
1,movement_1,0,0,0.338,0.455,-0.615,1,0.159,0.572,-0.576,...,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0
2,movement_1,0,0,0.342,0.455,-0.589,1,0.173,0.54,-0.551,...,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0
3,movement_1,0,0,0.332,0.427,-0.376,1,0.188,0.504,-0.302,...,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0
4,movement_1,0,0,0.34,0.422,-0.457,1,0.178,0.512,-0.437,...,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0


In [24]:
# Cell 10: Create folders in directory
data = pd.read_csv('filtered_coordinates.csv')

base_dir = 'DataBase'
if not os.path.exists(base_dir):
    os.makedirs(base_dir)
unique_classes = data['class'].unique()

for class_value in unique_classes:
    class_dir = os.path.join(base_dir, str(class_value))
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)
    
    class_data = data[data['class'] == class_value]

    for accuracy_type, subfolder_name in zip([1, 0], ['R', 'W']):
        accuracy_dir = os.path.join(class_dir, subfolder_name)
        if not os.path.exists(accuracy_dir):
            os.makedirs(accuracy_dir)
        
        accuracy_data = class_data[class_data['accuracy'] == accuracy_type]
        
        unique_sequences = accuracy_data['sequence'].unique()
        
        # Create subfolders for each sequence
        for sequence in unique_sequences:
            sequence_dir = os.path.join(accuracy_dir, str(sequence))
            if not os.path.exists(sequence_dir):
                os.makedirs(sequence_dir)

Folders created successfully with sequence subfolders inside 'R' and 'W'!


In [None]:
# Cell 10: Reduce decimal points
data = pd.read_csv('filtered_coordinates.csv')
data = data.round(3)
data.head()