In [None]:
import cv2
import mediapipe as mp
import numpy as np
import torch
import torch.nn as nn

# Set device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the ST-GCN model class (unchanged)
class STGCN(nn.Module):
    def __init__(self, in_channels=3, num_joints=18, num_classes=2):
        super(STGCN, self).__init__()
        self.num_joints = num_joints
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=(1, 1))
        self.conv2 = nn.Conv2d(64, 64, kernel_size=(3, 1), padding=(1, 0))
        self.conv3 = nn.Conv2d(64, 128, kernel_size=(1, 1))
        self.conv4 = nn.Conv2d(128, 128, kernel_size=(3, 1), padding=(1, 0))
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(128)
        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.AdaptiveAvgPool2d((1, num_joints))
        self.fc = nn.Linear(128 * num_joints, num_classes)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Load the pre-trained model (unchanged)
model_path = "Models/stgcn_fall_detection.pth"
try:
    model = STGCN(in_channels=3, num_joints=18, num_classes=2).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print(f"Model loaded successfully from {model_path}")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# Initialize MediaPipe Pose (unchanged)
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, 
                    enable_segmentation=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# Define the 18 landmarks in the same order as used during training (unchanged)
landmark_order = [
    "Nose", "Neck", "Right Shoulder", "Right Elbow", "Right Wrist",
    "Left Shoulder", "Left Elbow", "Left Wrist", "Right Hip", "Right Knee",
    "Right Ankle", "Left Hip", "Left Knee", "Left Ankle", "Right Eye",
    "Left Eye", "Right Ear", "Left Ear"
]

landmark_indices = {
    "Nose": 0, "Right Shoulder": 12, "Right Elbow": 14, "Right Wrist": 16,
    "Left Shoulder": 11, "Left Elbow": 13, "Left Wrist": 15, "Right Hip": 24,
    "Right Knee": 26, "Right Ankle": 28, "Left Hip": 23, "Left Knee": 25,
    "Left Ankle": 27, "Right Eye": 5, "Left Eye": 2, "Right Ear": 8, "Left Ear": 7
}

# Initialize buffer to store skeleton data (no fixed size limit)
buffer = []
window_size = 30  # Number of frames per window
step_size = 5    # Step size for sliding window (adjustable)

# Initial threshold for fall detection
threshold = 0.8

# Start capturing video from the default camera
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open camera.")
    exit()

# Main loop for real-time processing
while True:
    success, image = cap.read()
    if not success:
        print("Error: Unable to read frame from camera.")
        break
    
    # Convert the frame to RGB for MediaPipe processing
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)
    
    # Initialize skeleton frame with zeros (18 joints, 3 coordinates: x, y, z)
    skeleton_frame = np.zeros((18, 3))
    
    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        
        # Compute reference point: average of left and right hip
        left_hip = landmarks[23]
        right_hip = landmarks[24]
        ref_x = (left_hip.x + right_hip.x) / 2
        ref_y = (left_hip.y + right_hip.y) / 2
        ref_z = (left_hip.z + right_hip.z) / 2
        
        # Compute neck as the average of left and right shoulders
        left_shoulder = landmarks[11]
        right_shoulder = landmarks[12]
        neck_x = (left_shoulder.x + right_shoulder.x) / 2
        neck_y = (left_shoulder.y + right_shoulder.y) / 2
        neck_z = (left_shoulder.z + right_shoulder.z) / 2
        
        # Extract relative coordinates for the 18 landmarks
        for i, part in enumerate(landmark_order):
            if part == "Neck":
                x = neck_x - ref_x
                y = neck_y - ref_y
                z = neck_z - ref_z
            else:
                lm = landmarks[landmark_indices[part]]
                x = lm.x - ref_x
                y = lm.y - ref_y
                z = lm.z - ref_z
            skeleton_frame[i] = [x, y, z]
        
        # Draw the detected landmarks on the image for visualization
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
    
    # Add the current skeleton frame to the buffer
    buffer.append(skeleton_frame)
    
    # Process sliding windows if enough frames are available
    if len(buffer) >= window_size:
        # Calculate the number of windows that can be processed
        num_windows = (len(buffer) - window_size) // step_size + 1
        latest_fall_prob = 0
        latest_prediction = 0
        
        # Process the most recent window
        start_idx = max(0, len(buffer) - window_size)
        window = buffer[start_idx:start_idx + window_size]
        
        # Convert window to tensor with shape (1, 3, window_size, 18)
        skeleton_sequence = np.stack(window, axis=0)  # Shape: (window_size, 18, 3)
        skeleton_sequence = torch.tensor(skeleton_sequence, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0)  # Shape: (1, 3, window_size, 18)
        
        # Perform inference
        with torch.no_grad():
            output = model(skeleton_sequence.to(device))
            probabilities = torch.softmax(output, dim=1)
            fall_prob = probabilities[0, 1].item()
            prediction = 1 if fall_prob >= threshold else 0
        
        # Display the prediction and probability on the frame
        if prediction == 1:
            text = f"Fall Detected! ({fall_prob:.2f})"
            color = (0, 0, 255)  # Red
        else:
            text = f"No Fall ({fall_prob:.2f})"
            color = (0, 255, 0)  # Green
        cv2.putText(image, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
        
        # Display the current threshold and window info
        info_text = f"Threshold: {threshold:.2f} (+/-), Window: {window_size}, Step: {step_size}"
        cv2.putText(image, info_text, (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    else:
        # Display buffering message until enough frames are collected
        cv2.putText(image, f"Buffering... ({len(buffer)}/{window_size})", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    
    # Show the processed frame
    cv2.imshow("Live Fall Detection", image)
    
    # Handle key presses to adjust threshold, window size, step size, or exit
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Exit on 'q'
        break
    elif key == ord('+') or key == ord('='):  # Increase threshold
        threshold = min(1.0, threshold + 0.05)
    elif key == ord('-'):  # Decrease threshold
        threshold = max(0.0, threshold - 0.05)
    elif key == ord('w'):  # Increase window size
        window_size = min(60, window_size + 5)
    elif key == ord('s'):  # Decrease window size
        window_size = max(10, window_size - 5)
    elif key == ord('d'):  # Increase step size
        step_size = min(10, step_size + 1)
    elif key == ord('a'):  # Decrease step size
        step_size = max(1, step_size - 1)

# Release resources
cap.release()
cv2.destroyAllWindows()
pose.close()

Error loading model: [Errno 2] No such file or directory: 'Models/stgcn_fall_detection.pth'


I0000 00:00:1743945927.249937       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


: 