In [None]:
% pip install torch torchvision torchaudio


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
# LIBRARIES

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# CONSTANTS

NUM_KEYPOINTS = 33  # MediaPipe's gives 33 landmakrs
FEATURES_PER_KEYPOINT = 2  # Only using (x, y) coordinates for now
SEQUENCE_LENGTH = 16  # Number of frames analyzed at a time
NUM_CLASSES = 2  # "eccentric", "concentric"
INPUT_SIZE = NUM_KEYPOINTS * FEATURES_PER_KEYPOINT  # 66


In [11]:
# LSTM-based Rep Stage Classifier
# This model takes a short sequence of pose keypoints (e.g., 16 frames)
# and predicts the current stage of the exercise: eccentric or concentric.

class RepStageClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_classes=NUM_CLASSES):
        super(RepStageClassifier, self).__init__()
        
        # LSTM processes the input sequence of keypoints over time
        # input_size: features per frame (e.g., 66 for 33 keypoints × 2D)
        # hidden_size: size of LSTM's internal memory (can be tuned)
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        
        # Final linear layer maps LSTM output to class scores (eccentric/concentric)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Input x shape: (batch_size, sequence_length, input_size)
        
        # LSTM returns:
        # - out: LSTM outputs for each time step
        # - _: (hidden_state, cell_state), not used here
        out, _ = self.lstm(x)
        
        # We use the output from the last time step (i.e., the last frame in the sequence)
        # to make the final prediction
        out = self.fc(out[:, -1, :])  # shape: (batch_size, num_classes)
        return out

# Instantiate the model with the correct input size
model = RepStageClassifier(INPUT_SIZE)


In [13]:
# Define loss function — CrossEntropyLoss good for classification
criterion = nn.CrossEntropyLoss()

# Define optimizer — Adam widely used for sequence models like LSTM
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [15]:
batch_size = 4
dummy_input = torch.randn(batch_size, SEQUENCE_LENGTH, INPUT_SIZE)


In [16]:
# Random labels for 4 samples (0 = eccentric, 1 = concentric)
dummy_labels = torch.randint(0, NUM_CLASSES, (batch_size,))
print("Dummy labels:", dummy_labels)


Dummy labels: tensor([1, 0, 0, 0])


In [17]:
# FULL DUMMY TRAINING

# Reset gradients
optimizer.zero_grad()

# Forward pass
outputs = model(dummy_input)

# Calculate loss
loss = criterion(outputs, dummy_labels)

# Backward pass
loss.backward()

# Update weights
optimizer.step()

# Print results
print("Training loss:", loss.item())

# Optional: Print predicted class
predicted = torch.argmax(outputs, dim=1)
print("Predicted stages:", predicted)
print("True stages:     ", dummy_labels)


Training loss: 0.6813094019889832
Predicted stages: tensor([0, 0, 0, 0])
True stages:      tensor([1, 0, 0, 0])
