In [None]:
import cv2
import mediapipe as mp
import numpy as np
import torch
import torch.nn as nn

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# DNN Model Definition
class FallDetectionDNN(nn.Module):
    def __init__(self, input_size=1620, hidden_size=512, num_classes=2):
        super(FallDetectionDNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(hidden_size // 2, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Load the trained model
model_path = "../AI_Train/Models/dnn_fall_detection.pth"
try:
    model = FallDetectionDNN(input_size=1620).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    print(f"Model loaded successfully from {model_path}")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, 
                    enable_segmentation=False, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Landmark order (same as training)
landmark_order = [
    "Nose", "Neck", "Right Shoulder", "Right Elbow", "Right Wrist",
    "Left Shoulder", "Left Elbow", "Left Wrist", "Right Hip", "Right Knee",
    "Right Ankle", "Left Hip", "Left Knee", "Left Ankle", "Right Eye",
    "Left Eye", "Right Ear", "Left Ear"
]

landmark_indices = {
    "Nose": 0, "Right Shoulder": 12, "Right Elbow": 14, "Right Wrist": 16,
    "Left Shoulder": 11, "Left Elbow": 13, "Left Wrist": 15, "Right Hip": 24,
    "Right Knee": 26, "Right Ankle": 28, "Left Hip": 23, "Left Knee": 25,
    "Left Ankle": 27, "Right Eye": 5, "Left Eye": 2, "Right Ear": 8, "Left Ear": 7
}

# Buffers and settings
buffer = []
prob_buffer = []
window_size = 30  # Matches training (1 second at 30 FPS)
step_size = 2     # Frequent checks
threshold = 0.5   # Adjusted based on previous feedback
frame_count = 0
fps = 30  # Assumed FPS

# Graph settings
graph_height = 100
graph_width = 400
max_points = 100

# Start webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open camera.")
    exit()

# Frame dimensions
_, frame = cap.read()
if frame is not None:
    frame_height, frame_width = frame.shape[:2]
else:
    frame_height, frame_width = 480, 640
graph_y_offset = frame_height - graph_height - 10

# Main loop
while True:
    success, image = cap.read()
    if not success:
        print("Error: Unable to read frame from camera.")
        break
    
    frame_count += 1
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)
    
    skeleton_frame = np.zeros((18, 3))
    
    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        left_hip = landmarks[23]
        right_hip = landmarks[24]
        ref_x = (left_hip.x + right_hip.x) / 2
        ref_y = (left_hip.y + right_hip.y) / 2
        ref_z = (left_hip.z + right_hip.z) / 2
        left_shoulder = landmarks[11]
        right_shoulder = landmarks[12]
        neck_x = (left_shoulder.x + right_shoulder.x) / 2
        neck_y = (left_shoulder.y + right_shoulder.y) / 2
        neck_z = (left_shoulder.z + right_shoulder.z) / 2
        
        for i, part in enumerate(landmark_order):
            if part == "Neck":
                x = neck_x - ref_x
                y = neck_y - ref_y
                z = neck_z - ref_z
            else:
                lm = landmarks[landmark_indices[part]]
                x = lm.x - ref_x
                y = lm.y - ref_y
                z = lm.z - ref_z
            skeleton_frame[i] = [x, y, z]
        
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
    
    buffer.append(skeleton_frame)
    
    if len(buffer) >= window_size and frame_count % step_size == 0:
        window = buffer[-window_size:]  # Most recent 30 frames
        skeleton_sequence = np.stack(window, axis=0)  # (30, 18, 3)
        skeleton_sequence = skeleton_sequence.flatten()  # (1620,)
        skeleton_tensor = torch.tensor(skeleton_sequence, dtype=torch.float32).unsqueeze(0).to(device)  # (1, 1620)
        
        with torch.no_grad():
            output = model(skeleton_tensor)
            probabilities = torch.softmax(output, dim=1)
            fall_prob = probabilities[0, 1].item()
            prediction = 1 if fall_prob >= threshold else 0
        
        prob_buffer.append(fall_prob * 100)
        if len(prob_buffer) > max_points:
            prob_buffer.pop(0)
        
        text = f"Fall Detected ({fall_prob:.2f})" if prediction == 1 else f"No Fall ({fall_prob:.2f})"
        color = (0, 0, 255) if prediction == 1 else (0, 255, 0)
        cv2.putText(image, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
        
        info_text = f"Threshold: {threshold:.2f} (+/-), Window: {window_size}, Step: {step_size}"
        cv2.putText(image, info_text, (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    elif len(buffer) < window_size:
        cv2.putText(image, f"Buffering... ({len(buffer)}/{window_size})", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    
    # Draw graph
    graph_img = np.zeros((graph_height, graph_width, 3), dtype=np.uint8)
    if prob_buffer:
        cv2.line(graph_img, (0, graph_height - 10), (graph_width, graph_height - 10), (255, 255, 255), 1)
        cv2.line(graph_img, (10, 0), (10, graph_height), (255, 255, 255), 1)
        cv2.putText(graph_img, "Time (s)", (graph_width - 50, graph_height - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        cv2.putText(graph_img, "Prob (%)", (5, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        cv2.putText(graph_img, "100", (2, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1)
        cv2.putText(graph_img, "0", (2, graph_height - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1)
        
        points = []
        for i, prob in enumerate(prob_buffer):
            x = int(i * (graph_width - 20) / max_points) + 10
            y = int((1 - prob / 100) * (graph_height - 20)) + 10
            points.append((x, y))
        
        for i in range(1, len(points)):
            cv2.line(graph_img, points[i-1], points[i], (0, 255, 0), 1)
        
        time_span = len(prob_buffer) / fps
        cv2.putText(graph_img, f"{time_span:.1f}s", (graph_width - 30, graph_height - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1)
    
    image[graph_y_offset:graph_y_offset + graph_height, 10:10 + graph_width] = graph_img
    
    cv2.imshow("Live Fall Detection (DNN)", image)
    
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('+') or key == ord('='):
        threshold = min(1.0, threshold + 0.05)
    elif key == ord('-'):
        threshold = max(0.0, threshold - 0.05)
    elif key == ord('w'):
        window_size = min(60, window_size + 5)
    elif key == ord('s'):
        window_size = max(10, window_size - 5)
    elif key == ord('d'):
        step_size = min(10, step_size + 1)
    elif key == ord('a'):
        step_size = max(1, step_size - 1)

# Release resources
cap.release()
cv2.destroyAllWindows()
pose.close()

Error loading model: [Errno 2] No such file or directory: '../AI_Train/Models/dnn_fall_detection.pth'


I0000 00:00:1744339986.210187 2559143 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1744339986.334090 2559983 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1744339986.385033 2559987 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1744340006.138171 2559986 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


: 