In [1]:
from src.utils.pose_estimator import PoseEstimator

pose_estimation = PoseEstimator()


In [2]:
from src.utils.video_processor import VideoProcessor

landmarks = VideoProcessor.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')



In [4]:
import cv2
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from src.utils.pose_estimator import PoseEstimator

class VideoProcessor2:
    
    @staticmethod
    def process_frame(frame, pose_estimator):
        """
        Process a single frame to extract pose landmarks.

        :param frame: A single video frame.
        :param pose_estimator: PoseEstimator instance for detecting landmarks.
        :return: List of pose landmarks for the frame.
        """
        pose_landmarks = pose_estimator.estimate_pose(frame)
        if pose_landmarks:
            return [[lm.x, lm.y, lm.z, lm.visibility] for lm in pose_landmarks.landmark]
        return None

    @staticmethod
    def process_video(video_path: Path, num_threads=4) -> list[list[list[float]]]:
        """
        Process a single video file using multiple threads to extract pose landmarks.

        :param video_path: Path to the video file (as a Path object).
        :param num_threads: Number of threads to use for parallel processing.
        :return: A list of pose landmarks for each frame in the video.
        """
        pose_estimator = PoseEstimator()  # Create a new PoseEstimator instance
        cap = cv2.VideoCapture(str(video_path))
        all_landmarks = []

        if not cap.isOpened():
            raise ValueError(f"Unable to open video file: {video_path}")

        # Store frames for processing
        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        
        cap.release()

        # Use threading for parallel frame processing
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            futures = {executor.submit(VideoProcessor2.process_frame, frame, pose_estimator): frame for frame in frames}
            for future in as_completed(futures):
                result = future.result()
                if result is not None:
                    all_landmarks.append(result)

        return all_landmarks



landmarks2 = VideoProcessor2.process_video(
    r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')


In [1]:
from pathlib import Path
from src.utils.pose_estimator import PoseEstimator

class VideoProcessor3:
    @staticmethod
    def process_video(video_path: Path) -> list[list[list[float]]]:
        """
        Process a single video file, extract pose landmarks, and return them.

        :param video_path: Path to the video file (as a Path object).
        :return: A list of pose landmarks for each frame in the video.
        """
        pose_estimator = PoseEstimator()  # Create a new PoseEstimator instance for each thread
        cap = cv2.VideoCapture(str(video_path))
        all_landmarks = []

        if not cap.isOpened():
            raise ValueError(f"Unable to open video file: {video_path}")

        def process_frame(frame):
            try:
                pose_landmarks = pose_estimator.estimate_pose(frame)
                if pose_landmarks:
                    return [
                        [lm.x, lm.y, lm.z, lm.visibility]
                        for lm in pose_landmarks.landmark
                    ]
            except ValueError as e:
                print(f"Skipping frame due to error: {e}")
            return None

        with ThreadPoolExecutor() as executor:
            futures = []
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                futures.append(executor.submit(process_frame, frame))

            for future in as_completed(futures):
                result = future.result()
                if result:
                    all_landmarks.append(result)

        cap.release()
        return all_landmarks
    
landmarks3 = VideoProcessor3.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')

VideoProcessor.classify_video(landmarks3)



NameError: name 'VideoProcessor' is not defined

In [1]:
from threading import Thread
from queue import Queue

class FileVideoStream:
    def __init__(self, path, queue_size=128):
        # Initialize the video file stream and queue to store frames
        self.stream = cv2.VideoCapture(path)
        self.stopped = False
        self.queue = Queue(maxsize=queue_size)
    
    def start(self):
        # Start the thread to read frames from the video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self
    
    def update(self):
        # Loop until the stream is stopped
        while True:
            if self.stopped:
                return
            
            if not self.queue.full():
                # Read the next frame from the file
                (grabbed, frame) = self.stream.read()
                
                # If the frame was not grabbed, stop the thread
                if not grabbed:
                    self.stop()
                    return
                
                self.queue.put(frame)
    
    def read(self):
        # Return the next frame from the queue
        return self.queue.get()
    
    def more(self):
        # Return True if there are still frames in the queue
        return self.queue.qsize() > 0
    
    def stop(self):
        # Indicate that the thread should be stopped
        self.stopped = True


In [4]:
import torch
import numpy as np
from pathlib import Path
from imutils.video import FPS
import time

# Load your trained model
model = torch.load('lstm_v1.pth', map_location='cpu')
model.eval()

# Define your label mapping
label_mapping = {0: 'Squat', 1: 'Deadlift', 2: 'Bench Press', 3: 'Push-Up', 4: 'Lat Pulldown'}

def preprocess_frame_for_model(frame):
    # Your preprocessing code here (e.g., resizing, normalizing landmarks)
    # Ensure the input is compatible with your model
    return processed_frame

# Video path
video_path = "path_to_your_video.mp4"

# Initialize video stream
fvs = FileVideoStream(video_path).start()
time.sleep(1.0)  # Allow buffer to fill

fps = FPS().start()

while fvs.more():
    # Read the next frame from the video stream
    frame = fvs.read()

    # Preprocess the frame (extract landmarks or features compatible with your model)
    processed_frame = preprocess_frame_for_model(frame)

    # Convert to torch tensor
    input_tensor = torch.tensor(processed_frame).float().unsqueeze(0)

    # Make prediction
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted_label = torch.max(output, 1)
        predicted_class = label_mapping[predicted_label.item()]

    # Display the frame with the prediction
    cv2.putText(frame, f"Predicted: {predicted_class}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Frame", frame)

    # Break on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    fps.update()

# Stop the FPS counter and clean up
fps.stop()
print(f"[INFO] Elapsed time: {fps.elapsed():.2f}")
print(f"[INFO] Approx. FPS: {fps.fps():.2f}")

fvs.stop()
cv2.destroyAllWindows()


  model = torch.load('exercise_lstm_model_filtered.pth', map_location='cpu')


AttributeError: 'collections.OrderedDict' object has no attribute 'eval'

In [2]:
import cv2

class ThreadedVideoStream:
    def __init__(self, path, queue_size=128):
        # Initialize the video file stream and queue to store frames
        self.stream = cv2.VideoCapture(str(path))
        self.stopped = False
        self.queue = Queue(maxsize=queue_size)

    def start(self):
        # Start the thread to read frames from the video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # Loop until the stream is stopped
        while True:
            if self.stopped:
                return

            if not self.queue.full():
                # Read the next frame from the file
                grabbed, frame = self.stream.read()

                # If the frame was not grabbed, stop the thread
                if not grabbed:
                    self.stop()
                    return

                self.queue.put(frame)

    def read(self):
        # Return the next frame from the queue
        return self.queue.get()

    def more(self):
        # Return True if there are still frames in the queue
        return self.queue.qsize() > 0

    def stop(self):
        # Indicate that the thread should be stopped
        self.stopped = True
        self.stream.release()


In [3]:
from pathlib import Path
from src.utils.pose_estimator import PoseEstimator

class VideoProcessor:

    @staticmethod
    def process_video(video_path: Path) -> list[list[list[float]]]:
        """
        Process a single video file, extract pose landmarks, and return them.
        :param video_path: Path to the video file (as a Path object).
        :return: A list of pose landmarks for each frame in the video.
        """
        pose_estimator = PoseEstimator()  # Create a PoseEstimator instance
        all_landmarks = []
        
        # Initialize threaded video stream
        video_stream = ThreadedVideoStream(video_path).start()

        while video_stream.more():
            frame = video_stream.read()

            try:
                pose_landmarks = pose_estimator.estimate_pose(frame)

                if pose_landmarks:
                    # Extract landmarks for the current frame
                    all_landmarks.append(
                        [
                            [lm.x, lm.y, lm.z, lm.visibility]
                            for lm in pose_landmarks.landmark
                        ]
                    )

            except ValueError as e:
                print(f"Skipping frame due to error: {e}")
                continue

        # Stop the video stream
        video_stream.stop()
        return all_landmarks


In [None]:
landmarks = VideoProcessor.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')

In [1]:
# import the necessary packages
from imutils.video import FPS
import argparse
import imutils
import cv2

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video", required=True,
                help="path to input video file")
args = vars(ap.parse_args())

# open a pointer to the video stream and start the FPS timer
stream = cv2.VideoCapture(args["video"])
fps = FPS().start()

# loop over frames from the video file stream
while True:
    # grab the frame from the threaded video file stream
    (grabbed, frame) = stream.read()

    # if the frame was not grabbed, then we have reached the end of the stream
    if not grabbed:
        break

    # resize the frame and convert it to grayscale (while still retaining 3 channels)
    frame = imutils.resize(frame, width=450)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = np.dstack([frame, frame, frame])

    # display a piece of text to the frame
    cv2.putText(frame, "Slow Method", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # show the frame and update the FPS counter
    cv2.imshow("Frame", frame)
    cv2.waitKey(1)
    fps.update()

# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

# do a bit of cleanup
stream.release()
cv2.destroyAllWindows()


usage: ipykernel_launcher.py [-h] -v VIDEO
ipykernel_launcher.py: error: the following arguments are required: -v/--video


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)




In [1]:
from src.models.lstm import ExerciseLSTM
import torch
from src.utils.video_processor import VideoProcessor
from src.utils.pose_estimator import PoseEstimator

input_size = 132   # Should be 132
hidden_size = 128                       # As used during training
num_layers = 2                          # As used during training
num_classes = 5                         # Set to 5 to match the trained model

# Initialize the model
model = ExerciseLSTM(input_size, hidden_size, num_layers, num_classes)

# Load the trained model weights
model.load_state_dict(torch.load('lstm_v1.pth', map_location='cpu'))
model.eval()
video_proccesor = VideoProcessor(PoseEstimator(), model)


  model.load_state_dict(torch.load('exercise_lstm_model_filtered.pth', map_location='cpu'))


In [13]:
import cv2
from pathlib import Path
class VideoProcessor3:
    @staticmethod
    def process_video(video_path: Path) -> list[list[list[float]]]:
        """
        Process a single video file, extract pose landmarks, and return them.

        :param video_path: Path to the video file (as a Path object).
        :return: A list of pose landmarks for each frame in the video.
        """
        pose_estimator = PoseEstimator()  # Create a new PoseEstimator instance for each thread
        cap = cv2.VideoCapture(str(video_path))
        all_landmarks = []

        if not cap.isOpened():
            raise ValueError(f"Unable to open video file: {video_path}")

        def process_frame(frame):
            try:
                pose_landmarks = pose_estimator.estimate_pose(frame)
                if pose_landmarks:
                    return [
                        [lm.x, lm.y, lm.z, lm.visibility]
                        for lm in pose_landmarks.landmark
                    ]
            except ValueError as e:
                print(f"Skipping frame due to error: {e}")
            return None

        with ThreadPoolExecutor() as executor:
            futures = []
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                futures.append(executor.submit(process_frame, frame))

            for future in as_completed(futures):
                result = future.result()
                if result:
                    all_landmarks.append(result)

        cap.release()
        return all_landmarks
    
landmarks3 = VideoProcessor3.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')




In [3]:
video_proccesor.classify_video(landmarks3)

ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead

In [7]:
from copy import deepcopy
import numpy as np
import torch
import json

from src.models.lstm import ExerciseLSTM
from src.utils.pose_estimator import PoseEstimator
from src.utils.video_processor import VideoProcessor

# ---------------------- Load Data ----------------------

# Load the flattened sequences and labels
X = np.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\landmarks_data.npy', allow_pickle=True)  # Shape: (num_samples, seq_length, input_size)
# y = np.load('labels_data.npy')  # Shape: (num_samples,)

# Load the label mapping (ensure it's the one used during training)
with open(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\label_mapping.json', 'r') as f:
    label_mapping = json.load(f)

# Create an inverse label mapping (from indices to class names)
inverse_label_mapping = {int(v): k for k, v in label_mapping.items()}

# ---------------------- Select a Random Sample ----------------------

# Select a random index
# idx = random.randint(0, len(X) - 1)
# sample_sequence = X[idx]  # Shape: (sequence_length, input_size)
# true_label = y[idx]  # This is the numerical label (should be between 0 and 4)

# ---------------------- Preprocess the Sample ----------------------p
poseestimator = PoseEstimator()
videoprocessor = VideoProcessor(poseestimator)
sample_sequence = videoprocessor.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')
# Ensure the sample is a NumPy array of type float32
sample_sequence_for_testing = deepcopy(sample_sequence)


TypeError: VideoProcessor() takes no arguments

In [3]:
new_list = np.array([np.array(sample).flatten() for sample in sample_sequence], dtype=np.float32)
new_list.shape

(235, 132)

In [4]:
X[0].shape

(300, 132)

In [5]:
sample_sequence.shape

(235, 132)

In [8]:
sample_sequence = np.array(sample_sequence, dtype=np.float32)

# Add a batch dimension to the sample
sample_sequence = np.expand_dims(sample_sequence, axis=0)
sample_sequence = torch.tensor(sample_sequence, dtype=torch.float32)


# Model parameters (must match those used during training)
input_size = 132   # Should be 132
hidden_size = 128                       # As used during training
num_layers = 2                          # As used during training
num_classes = 5                         # Set to 5 to match the trained model



NameError: name 'sample_sequence' is not defined

In [6]:
with torch.no_grad():
    outputs = model(sample_sequence)
    _, predicted_label_idx = torch.max(outputs.data, 1)
    predicted_label_idx = predicted_label_idx.item()
    

ValueError: LSTM: Expected input to be 2D or 3D, got 5D instead

In [1]:
from copy import deepcopy
import numpy as np
import torch
import json

from src.models.lstm import ExerciseLSTM
from src.utils.pose_estimator import PoseEstimator
from src.utils.video_processor import VideoProcessor

# ---------------------- Load Data ----------------------

# Load the flattened sequences and labels
X = np.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\landmarks_data.npy', allow_pickle=True)  # Shape: (num_samples, seq_length, input_size)
# y = np.load('labels_data.npy')  # Shape: (num_samples,)

# Load the label mapping (ensure it's the one used during training)
with open(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\label_mapping.json', 'r') as f:
    label_mapping = json.load(f)

# Create an inverse label mapping (from indices to class names)
inverse_label_mapping = {int(v): k for k, v in label_mapping.items()}

# ---------------------- Select a Random Sample ----------------------

# Select a random index
# idx = random.randint(0, len(X) - 1)
# sample_sequence = X[idx]  # Shape: (sequence_length, input_size)
# true_label = y[idx]  # This is the numerical label (should be between 0 and 4)

# ---------------------- Preprocess the Sample ----------------------p
poseestimator = PoseEstimator()
video_proccesor = VideoProcessor()
sample_sequence = video_proccesor.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')
# Ensure the sample is a NumPy array of type float32
sample_sequence_for_testing = deepcopy(sample_sequence)

input_size = 132   # Should be 132
hidden_size = 128                       # As used during training
num_layers = 2                          # As used during training
num_classes = 5  
model = ExerciseLSTM(input_size, hidden_size, num_layers, num_classes)
model.load_state_dict(torch.load(r'/src/utils/lstm_v1.pth', map_location='cpu'))
model.eval()

video_proccesor.classify_video(model, sample_sequence)

  model.load_state_dict(torch.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\utils\exercise_lstm_model_filtered.pth', map_location='cpu'))


AttributeError: 'VideoProcessor' object has no attribute 'classify_video'

In [2]:
# Initialize the model

input_size = 132   # Should be 132
hidden_size = 128                       # As used during training
num_layers = 2                          # As used during training
num_classes = 5  
model = ExerciseLSTM(input_size, hidden_size, num_layers, num_classes)
model.load_state_dict(torch.load(r'/src/utils/lstm_v1.pth', map_location='cpu'))
model.eval()

  model.load_state_dict(torch.load(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\utils\exercise_lstm_model_filtered.pth', map_location='cpu'))


ExerciseLSTM(
  (lstm): LSTM(132, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=5, bias=True)
)

In [25]:
import torch
import numpy as np
def classify_sequence(model: torch.nn.Module, sequence: np.ndarray) -> int:
    """
    Predict the exercise class for the given sequence using the model.
    """
    sequence = np.expand_dims(sequence, axis=0)
    sequence = torch.tensor(sequence, dtype=torch.float32)

    with torch.no_grad():
        outputs = model(sequence)
        return torch.nn.functional.softmax(outputs, dim=1).numpy().flatten()

In [1]:
from video_processor import VideoProcessor
video_proccesor = VideoProcessor()
# sample_sequence = video_proccesor.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\squat\squat_1.mp4')
# sample_sequence = video_proccesor.process_video(r'C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\barbell biceps curl\video1.mp4')
sample_sequence = video_proccesor.process_video(r"C:\Users\barrt\PycharmProjects\Gymalyze\src\data\videos\test\test\bench press\bench press_2.mp4")

SyntaxError: invalid syntax (video_processor.py, line 203)

In [31]:
classify_sequence(model, sample_sequence)

[0.93101114 0.03233624 0.02919892 0.00307928 0.00437451]


0

In [4]:
video_proccesor.classify_sequence(model, sample_sequence)

array([0.93101114, 0.03233624, 0.02919892, 0.00307928, 0.00437451],
      dtype=float32)