### import modules

In [2]:
import cv2
import torch
import mediapipe as mp
import torch.nn as nn
from collections import deque

### define model

In [3]:
class FallDetectionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(FallDetectionLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])

        out  = torch.sigmoid(out)
        return out


### model parameters and pose detection initialization

In [4]:
input_size=132
hidden_size=132
num_layers=3
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, 
                    model_complexity=1, 
                    enable_segmentation=False, 
                    min_detection_confidence=0.5)

I0000 00:00:1701375705.148586       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


### create an instance of the model 

In [5]:
model = FallDetectionLSTM(input_size, hidden_size, num_layers)
model.load_state_dict(torch.load('lstm_model.pth'))
model.eval()

FallDetectionLSTM(
  (lstm): LSTM(132, 132, num_layers=3, batch_first=True)
  (fc): Linear(in_features=132, out_features=1, bias=True)
)

In [17]:
import os

In [18]:
def write_to_video(frames_deque, output_filename, fps=20.0, frame_size=(640, 480)):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    # Check if the video file exists
    if os.path.exists(output_filename):
        # Append mode - 'isColor' argument is set as True to allow color frames
        out = cv2.VideoWriter(output_filename, fourcc, fps, frame_size, isColor=True)
    else:
        # New file creation
        out = cv2.VideoWriter(output_filename, fourcc, fps, frame_size)

    # Write the frames to the video file
    for frame in frames_deque:
        out.write(frame)

    # Release the VideoWriter object
    out.release()

### predict on a recorded video

In [19]:
cap = cv2.VideoCapture("/Users/varunshankarhoskere/Downloads/WhatsApp Video 2023-11-22 at 12.45.34.mp4")
window_size = 100
frame_window = deque(maxlen=window_size)
while True:
    ret, frame = cap.read()
    frame_window.append(frame)
    # Process the frame and detect the pose using MediaPipe
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Check if pose landmarks are detected
    if results.pose_landmarks:
        # Extract pose landmarks and convert to tensor
        pose_landmarks = torch.tensor([[lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark]).flatten()
        pose_landmarks = pose_landmarks.unsqueeze(0).unsqueeze(0)  # Add batch and sequence dimensions

        # Make prediction using the LSTM model
        with torch.no_grad():
            output = model(pose_landmarks)
            predicted_label = (output > 0.5).float().item()


        if predicted_label == 1:
            cv2.putText(frame, "Fall Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        else:
            cv2.putText(frame, "No Fall Detected", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('Fall Detection', frame)

    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

### trial extra stuff below

In [22]:
import cv2
import torch
from collections import deque

# Initialize VideoCapture
cap = cv2.VideoCapture("/Users/varunshankarhoskere/Downloads/WhatsApp Video 2023-11-22 at 12.45.34.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)  # Get the frames per second of the video
ten_seconds_frames = int(10 * fps)  # Calculate the number of frames for 10 seconds

frame_window = deque(maxlen=ten_seconds_frames)
fall_detected = False  # Flag to check if fall is already detected

while True:
    ret, frame = cap.read()
    if not ret:
        break  # Break the loop if no frame is captured

    frame_window.append(frame)
    
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Check if pose landmarks are detected
    if results.pose_landmarks:
        # Extract pose landmarks and convert to tensor
        pose_landmarks = torch.tensor([[lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark]).flatten()
        pose_landmarks = pose_landmarks.unsqueeze(0).unsqueeze(0)  # Add batch and sequence dimensions

        # Make prediction using the LSTM model
        with torch.no_grad():
            output = model(pose_landmarks)
            predicted_label = (output > 0.5).float().item()

    if predicted_label == 1 and not fall_detected:
        # Save the last 10 seconds of frames
        fall_detected = True
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter('output.mp4', fourcc, fps, (frame.shape[1], frame.shape[0]))
        for f in frame_window:
            out.write(f)
        out.release()

    # Rest of your code for displaying the frame and breaking the loop...

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 