In [1]:
# Install the Google Cloud Storage client library and the gcsfs filesystem interface for Python. The --quiet flag suppresses output during installation.
!pip install google-cloud-storage gcsfs --quiet

In [2]:
# Authentication with google cloud
from google.colab import auth
auth.authenticate_user()



In [3]:
from google.cloud import storage

# GCS bucket and file details
BUCKET_NAME = "exercise-recognition-dataset"
ZIP_FILE_PATH = "exercise_dataset.zip"
LOCAL_ZIP_PATH = "/content/exercise_dataset.zip"

# Initialize GCS client
client = storage.Client()

# Get bucket reference
bucket = client.bucket(BUCKET_NAME)

# Get blob reference
blob = bucket.blob(ZIP_FILE_PATH)

# Download blob to local file
blob.download_to_filename(LOCAL_ZIP_PATH)

print("Downloaded successfully.")

Downloaded successfully.


In [4]:
# Unzipping the files
import zipfile

# Open the downloaded zip file in read mode ('r')
with zipfile.ZipFile(LOCAL_ZIP_PATH, 'r') as zip_ref:
    # Extract all the contents of the zip file to the specified directory
    zip_ref.extractall("/content/exercise_dataset")

# Print a confirmation message when the extraction is finished
print("Extraction complete.")

Extraction complete.


In [5]:
# Installing required libraries and dependencies
!pip install tensorflow opencv-python mediapipe scikit-learn matplotlib

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.2-py3-none-any.whl.metadata (1.6 kB)
Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m52.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.2-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.21 sounddevice-0.5.2


In [10]:
import os
import cv2

# Define input and output directories
INPUT_DIR = '/content/exercise_dataset/Exercises'
OUTPUT_DIR = '/content/frames_dataset'
FRAME_RATE = 1  # Desired frames per second

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Function to extract frames from a video
def extract_frames(video_path, save_path, frame_rate=1):
    cap = cv2.VideoCapture(video_path)  # Open the video file
    fps = cap.get(cv2.CAP_PROP_FPS)      # Get original video FPS
    frame_interval = int(fps // frame_rate)  # Calculate frame interval for desired rate

    frame_id = 0
    saved_frame = 0

    while cap.isOpened():
        ret, frame = cap.read()  # Read the next frame
        if not ret:
            break  # End of video
        if frame_id % frame_interval == 0:
            frame_file = os.path.join(save_path, f'frame_{saved_frame:05d}.jpg')
            cv2.imwrite(frame_file, frame)  # Save the frame as JPG
            saved_frame += 1
        frame_id += 1

    cap.release()  # Release video object


# Loop through class folders and video files
for label in os.listdir(INPUT_DIR):
    class_path = os.path.join(INPUT_DIR, label)
    if os.path.isdir(class_path):
        for video in os.listdir(class_path):
            if video.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                video_path = os.path.join(class_path, video)
                video_name = os.path.splitext(video)[0]
                save_dir = os.path.join(OUTPUT_DIR, label, video_name)
                os.makedirs(save_dir, exist_ok=True)
                extract_frames(video_path, save_dir, FRAME_RATE)

print("Frame extraction complete!")

Frame extraction complete!


In [11]:
"""Because the target classes `Kettlebell_swings` and `RDL_dumbbell` have subfolders within them rather than recomputing
the frame by merging the subfolder, we will rather focus on those target classes only. We are ignoring the weight
parameter it maybe important from future aspects point of view during the data collection."""

import os

# Define target classes to process.  The script will only process videos within these class directories.
TARGET_CLASSES = ['Kettlebell_swings', 'RDL_dumbbell']

# Walk only inside the target class folders
for target in TARGET_CLASSES:
    class_path = os.path.join(INPUT_DIR, target)  # Path to the current target class directory.
    output_class_dir = os.path.join(OUTPUT_DIR, target) # Path to the output directory for the current class.
    os.makedirs(output_class_dir, exist_ok=True)    # Create the output directory if it doesn't exist.

    for root, _, files in os.walk(class_path): # Walk through the directory tree rooted at class_path.
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')): # Check if the file is a video file.
                video_path = os.path.join(root, file)         # Full path to the video file.
                video_name = os.path.splitext(file)[0]     # Video filename without extension.
                save_dir = os.path.join(output_class_dir, video_name) # Directory to save frames for this video.
                os.makedirs(save_dir, exist_ok=True)        # Create the frame save directory.
                extract_frames(video_path, save_dir, FRAME_RATE) # Extract frames from the video.

print("Subfolders merged and frame extraction complete.")

Subfolders merged and frame extraction complete.


In [12]:
import mediapipe as mp
import pandas as pd
import os
import cv2

# Directories
FRAME_DIR = '/content/frames_dataset'
CSV_OUTPUT_DIR = '/content/pose_csv_dataset'

# Initialize Mediapipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True)

def extract_pose_from_frame(img_path):
    """Extract pose landmarks from an image."""
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = pose.process(img_rgb)

    if results.pose_landmarks:
        row = []
        for lm in results.pose_landmarks.landmark:
            row.extend([lm.x, lm.y, lm.z, lm.visibility])
        return row
    return None

# Make output directory
os.makedirs(CSV_OUTPUT_DIR, exist_ok=True)

# Process each class folder
for label in os.listdir(FRAME_DIR):
    class_path = os.path.join(FRAME_DIR, label)
    if os.path.isdir(class_path):
        for video in os.listdir(class_path):
            video_path = os.path.join(class_path, video)
            if os.path.isdir(video_path):
                pose_data = []
                frame_files = sorted(os.listdir(video_path))
                print(f"Processing {video}...")

                for frame_file in frame_files:
                    frame_path = os.path.join(video_path, frame_file)
                    row = extract_pose_from_frame(frame_path)
                    if row:
                        pose_data.append([frame_file] + row)

                if pose_data:
                    columns = ['frame']
                    for i in range(33):
                        columns.extend([f'x{i}', f'y{i}', f'z{i}', f'v{i}'])

                    df = pd.DataFrame(pose_data, columns=columns)
                    out_dir = os.path.join(CSV_OUTPUT_DIR, label)
                    os.makedirs(out_dir, exist_ok=True)
                    out_csv = os.path.join(out_dir, f"{video}.csv")
                    df.to_csv(out_csv, index=False)

print("Pose keypoint extraction complete! CSVs saved in /content/pose_csv_dataset")

Processing kettlebell_swings_2kg_8_102...
Processing kettlebell_swings_4kg_3_18...
Processing kettlebell_swings_2kg_4_114...
Processing kettlebell_swings_6kg_2_102...
Processing kettlebell_swings_8kg_8_102...
Processing kettlebell_swings_6kg_7_114...
Processing kettlebell_swings_6kg_1_114...
Processing kettlebell_swings_12kg_2_102...
Processing kettlebell_swings_2kg_2_102...
Processing kettlebell_swings_2kg_5_102...
Processing kettlebell_swings_4kg_2_102...
Processing kettlebell_swings_6kg_5_102...
Processing kettlebell_swings_8kg_9_18...
Processing kettlebell_swings_12kg_4_114...
Processing kettlebell_swings_8kg_2_102...
Processing kettlebell_swings_8kg_7_114...
Processing kettlebell_swings_2kg_1_114...
Processing kettlebell_swings_6kg_8_102...
Processing kettlebell_swings_4kg_4_114...
Processing kettlebell_swings_6kg_3_18...
Processing kettlebell_swings_6kg_9_18...
Processing kettlebell_swings_6kg_4_114...
Processing kettlebell_swings_4kg_6_18...
Processing kettlebell_swings_4kg_7_11

In [26]:
import os
import pandas as pd

data = []

CSV_DIR = '/content/pose_csv_dataset'  # Directory containing the CSV files

# Loop through each label (class) directory in the CSV directory
for label in os.listdir(CSV_DIR):
    class_dir = os.path.join(CSV_DIR, label) # Path to the class directory
    if os.path.isdir(class_dir):          # Check if it's a directory
        # Loop through each file in the class directory
        for file in os.listdir(class_dir):
            if file.endswith('.csv'):    # Check if the file is a CSV file
                csv_path = os.path.join(class_dir, file) # Full path to the CSV file
                df = pd.read_csv(csv_path)            # Read the CSV file into a Pandas DataFrame
                df['label'] = label                 # Add a 'label' column with the class name
                df['video'] = file                   # Add a 'video' column with the filename
                data.append(df)                    # Append the DataFrame to the data list

# Concatenate all into a single DataFrame
full_df = pd.concat(data, ignore_index=True) # Concatenate all DataFrames in the list
print("Combined shape:", full_df.shape)       # Print the shape (rows, columns) of the combined DataFrame

Combined shape: (14138, 135)


In [27]:
full_df.head()  # View of the first 5 rows of the dataset

Unnamed: 0,frame,x0,y0,z0,v0,x1,y1,z1,v1,x2,...,x31,y31,z31,v31,x32,y32,z32,v32,label,video
0,frame_00009.jpg,0.551812,0.574754,-0.038027,0.998848,0.545737,0.564036,-0.038498,0.998614,0.549707,...,0.906977,0.586598,0.061681,0.340017,0.897279,0.616264,0.172381,0.171422,Kettlebell_swings,kettlebell_swings_8kg_2_102.csv
1,frame_00000.jpg,0.432927,0.46681,-0.205097,0.983814,0.437848,0.455041,-0.219827,0.984246,0.438948,...,0.419005,0.919513,0.210107,0.935987,0.43867,0.872235,0.398581,0.741659,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv
2,frame_00001.jpg,0.400312,0.493836,-0.132653,0.956673,0.404501,0.481483,-0.14879,0.962436,0.405526,...,0.417578,0.923985,0.18002,0.84339,0.44012,0.836672,0.431205,0.497793,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv
3,frame_00002.jpg,0.392901,0.511545,-0.131369,0.99653,0.395103,0.500852,-0.147117,0.997596,0.395558,...,0.413057,0.920415,0.102007,0.973134,0.405331,0.754452,0.482915,0.577804,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv
4,frame_00003.jpg,0.426719,0.472809,-0.170183,0.989553,0.431108,0.462486,-0.183992,0.989612,0.432652,...,0.417033,0.916374,0.181423,0.826518,0.419367,0.900731,0.32642,0.517659,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv


In [28]:
full_df.to_csv('full_pose_dataset.csv', index=False) # Save the combined DataFrame to a CSV file