In [1]:
import mediapipe as mp
import cv2
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import time
import math
import os

In [2]:
# Define the Dataset Class
class KeypointDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.X = self.data.drop(columns=['label']).values.astype(np.float32)
        self.y = self.data['label'].values.astype(np.int64)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])

In [48]:
# 하이퍼파라미퍼 튜님

batch_size = 64
learning_rate = 0.0001
num_epochs = 4000

In [49]:
# Define the Neural Network (fully connected)
class KeypointClassifier(nn.Module):
    def __init__(self, input_size=99, hidden_size=128, num_classes=2):
        super(KeypointClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [None]:
# Load Dataset and Split
dataset = KeypointDataset("keypoints_dataset.csv")
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [51]:
# Train the Model
model = KeypointClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60, 90, 120], gamma=0.1)

# Training Loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        # scheduler.step()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

Epoch 1/4000, Loss: 1.3883
Epoch 2/4000, Loss: 1.3809
Epoch 3/4000, Loss: 1.3906
Epoch 4/4000, Loss: 1.3883
Epoch 5/4000, Loss: 1.3867
Epoch 6/4000, Loss: 1.3940
Epoch 7/4000, Loss: 1.3832
Epoch 8/4000, Loss: 1.3793
Epoch 9/4000, Loss: 1.3916
Epoch 10/4000, Loss: 1.3797
Epoch 11/4000, Loss: 1.3843
Epoch 12/4000, Loss: 1.3804
Epoch 13/4000, Loss: 1.3785
Epoch 14/4000, Loss: 1.3759
Epoch 15/4000, Loss: 1.3753
Epoch 16/4000, Loss: 1.3970
Epoch 17/4000, Loss: 1.3699
Epoch 18/4000, Loss: 1.3699
Epoch 19/4000, Loss: 1.3643
Epoch 20/4000, Loss: 1.3665
Epoch 21/4000, Loss: 1.3912
Epoch 22/4000, Loss: 1.3720
Epoch 23/4000, Loss: 1.3821
Epoch 24/4000, Loss: 1.3855
Epoch 25/4000, Loss: 1.3687
Epoch 26/4000, Loss: 1.3777
Epoch 27/4000, Loss: 1.3748
Epoch 28/4000, Loss: 1.3833
Epoch 29/4000, Loss: 1.3611
Epoch 30/4000, Loss: 1.3662
Epoch 31/4000, Loss: 1.3692
Epoch 32/4000, Loss: 1.3746
Epoch 33/4000, Loss: 1.3755
Epoch 34/4000, Loss: 1.3704
Epoch 35/4000, Loss: 1.3770
Epoch 36/4000, Loss: 1.3747
E

In [52]:
# Evaluate the Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.2f}")

Test Accuracy: 0.75


In [54]:
def extract_keypoints_from_frame(frame):
    mp_pose = mp.solutions.pose
    with mp_pose.Pose(static_image_mode=False) as pose:
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(rgb_frame)
        if results.pose_landmarks:
            keypoints = [(lm.x, lm.y, lm.z) for lm in results.pose_landmarks.landmark]
            return np.array(keypoints).flatten()
        else:
            return np.zeros(99)

class poseDetector():
    def __init__(self, static_image_mode=False, model_complexity=1,
                 smooth_landmarks=True, detectionCon=0.5, trackCon=0.5):
        self.static_image_mode = static_image_mode
        self.model_complexity = model_complexity
        self.smooth_landmarks = smooth_landmarks
        self.detectionCon = detectionCon
        self.trackCon = trackCon
        self.mpDraw = mp.solutions.drawing_utils
        self.mpPose = mp.solutions.pose
        self.pose = self.mpPose.Pose(static_image_mode=self.static_image_mode,
                                     model_complexity=self.model_complexity,
                                     smooth_landmarks=self.smooth_landmarks,
                                     min_detection_confidence=self.detectionCon,
                                     min_tracking_confidence=self.trackCon)
    def findPose(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.pose.process(imgRGB)
        if self.results.pose_landmarks:
            if draw:
                self.mpDraw.draw_landmarks(img, self.results.pose_landmarks,
                                           self.mpPose.POSE_CONNECTIONS)
        return img
    def findPosition(self, img, draw=True):
        self.lmList = []
        if self.results.pose_landmarks:
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                h, w, c = img.shape
                # print(id, lm)
                cx, cy = int(lm.x * w), int(lm.y * h)
                self.lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
        return self.lmList
    def findAngle(self, img, p1, p2, p3, draw=True):
        # Get the landmarks
        x1, y1 = self.lmList[p1][1:]
        x2, y2 = self.lmList[p2][1:]
        x3, y3 = self.lmList[p3][1:]
        # Calculate the Angle
        angle = math.degrees(math.atan2(y3 - y2, x3 - x2) -
                             math.atan2(y1 - y2, x1 - x2))
        if angle < 0:
            angle += 360
        # print(angle)
        # Draw
        if draw:
            cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 3)
            cv2.line(img, (x3, y3), (x2, y2), (255, 255, 255), 3)
            cv2.circle(img, (x1, y1), 10, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x1, y1), 15, (0, 0, 255), 2)
            cv2.circle(img, (x2, y2), 10, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2, y2), 15, (0, 0, 255), 2)
            cv2.circle(img, (x3, y3), 10, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x3, y3), 15, (0, 0, 255), 2)
            cv2.putText(img, str(int(angle)), (x2 - 50, y2 + 50),
                        cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 2)
        return angle
    
def main():
    cap = cv2.VideoCapture(0, cv2.CAP_AVFOUNDATION)
    print("Real-time posture detection started. Press 'q' to exit.")

    pTime = 0
    detector = poseDetector()  # Assuming poseDetector is defined elsewhere

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Process the frame for keypoints
        keypoints = extract_keypoints_from_frame(frame)
        keypoints_tensor = torch.tensor(keypoints, dtype=torch.float32).unsqueeze(0)

        with torch.no_grad():
            output = model(keypoints_tensor)
            _, predicted = torch.max(output, 1)
            posture = "Correct" if predicted[0] == 1 else "Incorrect"

        # Add keypoints using poseDetector
        frame = detector.findPose(frame)
        lmList = detector.findPosition(frame, draw=False)
        if len(lmList) != 0:
            # Example: Drawing a circle around keypoint #14
            cv2.circle(frame, (lmList[14][1], lmList[14][2]), 15, (0, 0, 255), cv2.FILLED)

        # Calculate FPS
        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime

        # Add posture label and FPS to the frame
        cv2.putText(frame, f"Posture: {posture}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 255, 0) if posture == "Correct" else (0, 0, 255), 2)
        cv2.putText(frame, f"FPS: {int(fps)}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        # Display the frame
        cv2.imshow("Posture Detection", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Real-time posture detection started. Press 'q' to exit.


I0000 00:00:1733902077.588021  771071 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
I0000 00:00:1733902077.623660  771071 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2 Pro
W0000 00:00:1733902077.675544  786229 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733902077.691778  786243 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733902077.692480  786231 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733902077.709988  786243 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for fe