### Relevant code : 

In [2]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import mediapipe as mp
import cv2

Function to get the pose from the frame

In [3]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, enable_segmentation=False, min_detection_confidence=0.5)

def extract_pose_data(frame):
    # Convert the color space from BGR to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame to detect the pose
    results = pose.process(frame_rgb)

    # Check if pose landmarks are detected
    if results.pose_landmarks:
        # Extract landmarks data
        pose_data = np.array([[lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark]).flatten()
        return pose_data
    return np.zeros(132)  # Assuming 33 landmarks each with 4 values (x, y, z, visibility)


I0000 00:00:1701277142.771275       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Generating the sequences from the dataset

In [None]:
def create_image_sequences(image_folder, sequence_length):
    sequences = []
    image_files = sorted(os.listdir(image_folder))

    for i in tqdm(range(len(image_files) - sequence_length + 1), desc="Processing image sequences"):
        sequence = []

        for j in range(i, i + sequence_length):
            img_path = os.path.join(image_folder, image_files[j])
            frame = cv2.imread(img_path)
            print(frame)
            if frame is None:
                print(f"Failed to read image: {img_path}")
                break
            pose_data = extract_pose_data(frame)
            sequence.append(pose_data)

        if len(sequence) == sequence_length:
            sequences.append(sequence)

    return np.array(sequences)

def create_label_sequences(label_folder, sequence_length, image_files):
    labels = []

    for i in tqdm(range(len(image_files) - sequence_length + 1), desc="Processing label sequences"):
        sequence_labels = []

        for j in range(i, i + sequence_length):
            label_path = os.path.join(label_folder, image_files[j].replace('.png', '.txt'))
            with open(label_path, 'r') as file:
                if file is None:
                    print(f"failed to read label: {label_path}")
                fall_label = int(file.read().split()[0])
                sequence_labels.append(fall_label)

        # Use the most frequent label in the sequence as the sequence label
        if sequence_labels:
            labels.append(max(set(sequence_labels), key=sequence_labels.count))

    return np.array(labels)

# Example usage
sequence_length = 30
image_folder = "D:\dataset\images"
label_folder = "D:\dataset\labels"

image_sequences = create_image_sequences(image_folder, sequence_length)
image_files = sorted(os.listdir(image_folder))  # Ensure this matches the sorting in image sequence creation
label_sequences = create_label_sequences(label_folder, sequence_length, image_files)

# Now image_sequences and label_sequences contain your data


In [None]:
import numpy as np

# Saving the image sequences
np.save('/path/to/save/image_sequences.npy', image_sequences)

# Saving the label sequences
np.save('/path/to/save/label_sequences.npy', label_sequences)

In [4]:
# Loading the image sequences
image_sequences = np.load('image_sequences.npy')

# Loading the label sequences
label_sequences = np.load('label_sequences.npy')

In [5]:
import torch
from torch.utils.data import DataLoader, TensorDataset

In [6]:
# Convert sequences and labels to PyTorch tensors
sequences_tensor = torch.tensor(image_sequences, dtype=torch.float32)
labels_tensor = torch.tensor(label_sequences, dtype=torch.long)

# Create a dataset and data loader
dataset = TensorDataset(sequences_tensor, labels_tensor)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# LSTM model initialization and training loop
# (Use the FallDetectionLSTM class and training loop provided earlier)


In [7]:
import torch.nn as nn

class FallDetectionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(FallDetectionLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])

        out  = torch.sigmoid(out)
        return out


In [8]:
from sklearn.model_selection import train_test_split

# Assuming image_sequences and label_sequences are your data
train_seqs, test_seqs, train_labels, test_labels = train_test_split(
    image_sequences, label_sequences, test_size=0.2, random_state=42
)

train_dataset = TensorDataset(torch.tensor(train_seqs, dtype=torch.float32), torch.tensor(train_labels, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(test_seqs, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [9]:
import torch.optim as optim

In [10]:
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for sequences, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(sequences)
            
            # Convert labels to match the shape of the model output
            labels = labels.view(-1, 1).float()

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

def evaluate_model(model, test_loader):
    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for sequences, labels in test_loader:
            predicted = model(sequences)

            # Convert labels to match the shape of the model output
            labels = labels.view(-1, 1).float()

            # Apply a threshold (0.5) to convert probabilities to binary predictions
            predicted = (predicted > 0.5).float()

            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')

# Assuming train_loader and test_loader are DataLoader instances for your dataset
# Adjust the input_size, hidden_size, num_layers, num_classes accordingly
input_size = 132
hidden_size = 132
num_layers = 3

# Model Initialization
model = FallDetectionLSTM(input_size, hidden_size, num_layers)
criterion = nn.BCELoss()  # Binary Cross Entropy Loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and Evaluation
train_model(model, train_loader, criterion, optimizer, num_epochs=10)
evaluate_model(model, test_loader)


Epoch [1/10], Loss: 0.3510
Epoch [2/10], Loss: 0.1831
Epoch [3/10], Loss: 0.3009
Epoch [4/10], Loss: 0.0096
Epoch [5/10], Loss: 0.1326
Epoch [6/10], Loss: 0.0459
Epoch [7/10], Loss: 0.0011
Epoch [8/10], Loss: 0.0587
Epoch [9/10], Loss: 0.0728
Epoch [10/10], Loss: 0.0649
Accuracy: 98.09%


In [11]:
torch.save(model.state_dict(), 'lstm_model.pth')

In [12]:
import cv2
import mediapipe as mp

mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, model_complexity=1, enable_segmentation=False, min_detection_confidence=0.5)

def extract_pose_data(frame):
    # Process frame with MediaPipe Pose
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    # Extract pose landmarks or other relevant data
    # This should match the format of your training data
    # ...
    if results.pose_landmarks:
        # Extract pose landmarks and flatten the array
        pose_data = np.array([[lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark]).flatten()
        return pose_data
    return np.zeros(132)

cap = cv2.VideoCapture('/Users/varunshankarhoskere/Downloads/WA Test/WhatsApp Video 2023-11-25 at 13.16.04.mp4')
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
sequence_length = 30  # Assuming this is the sequence length used during training

all_sequences = []
current_sequence = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    pose_data = extract_pose_data(frame)
    current_sequence.append(pose_data)

    if len(current_sequence) == sequence_length:
        all_sequences.append(current_sequence.copy())
        current_sequence.pop(0)  # Slide the window

cap.release()


I0000 00:00:1701277259.769492       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro


In [13]:
input_size = 132
hidden_size = 132
num_layers = 3
num_classes = 2

In [14]:
import torch

In [15]:
# Assuming your model class is BinaryClassificationLSTM
# and you have a saved model file 'lstm_model.pth'
model = FallDetectionLSTM(input_size, hidden_size, num_layers)
model.load_state_dict(torch.load('lstm_model.pth'))
model.eval()

# Convert sequences to tensor
sequences_tensor = torch.tensor(all_sequences, dtype=torch.float32)

# Make predictions
out = []
with torch.no_grad():
    for sequence in sequences_tensor:
        sequence = sequence.unsqueeze(0)  # Add batch dimension
        output = model(sequence)
        # print(output.float())

        # Apply a threshold (0.5) to convert probabilities to binary predictions
        predicted = (output > 0.5).float()

        out.append(predicted)
        # Now `predicted` is your model's binary prediction for this sequence
        ...
if 1 in out:
    print(1)
else:
    print(0)

  sequences_tensor = torch.tensor(all_sequences, dtype=torch.float32)


1


In [17]:
model = FallDetectionLSTM(input_size, hidden_size, num_layers)
model.load_state_dict(torch.load('lstm_model.pth'))
model.eval()

FallDetectionLSTM(
  (lstm): LSTM(132, 132, num_layers=3, batch_first=True)
  (fc): Linear(in_features=132, out_features=1, bias=True)
)

In [18]:


# Assuming sequences_tensor is your input data
with torch.no_grad():
    for sequence in sequences_tensor:
        sequence = sequence.unsqueeze(0)
        output = model(sequence)
        predicted_class = torch.sigmoid(output).round()  # For binary classification with sigmoid
        # or use torch.max for softmax
        print(predicted_class)


tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1.]])
tensor([[1

In [11]:
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()

    # Process the frame and detect the pose using MediaPipe
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Check if pose landmarks are detected
    if results.pose_landmarks:
        # Extract pose landmarks and convert to tensor
        pose_landmarks = torch.tensor([[lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark]).flatten()
        pose_landmarks = pose_landmarks.unsqueeze(0).unsqueeze(0)  # Add batch and sequence dimensions

        # Make prediction using the LSTM model
        with torch.no_grad():
            output = model(pose_landmarks)
            predicted_label = (output > 0.5).float().item()

        # Display the result
        if predicted_label == 1:
            cv2.putText(frame, "Fall Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        else:
            cv2.putText(frame, "No Fall Detected", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('Fall Detection', frame)

    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()

NameError: name 'model' is not defined

In [21]:
cap = cv2.VideoCapture("/Users/varunshankarhoskere/Downloads/WA Test/WhatsApp Video 2023-11-25 at 13.16.04.mp4")

while True:
    ret, frame = cap.read()

    # Process the frame and detect the pose using MediaPipe
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Check if pose landmarks are detected
    if results.pose_landmarks:
        # Extract pose landmarks and convert to tensor
        pose_landmarks = torch.tensor([[lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark]).flatten()
        pose_landmarks = pose_landmarks.unsqueeze(0).unsqueeze(0)  # Add batch and sequence dimensions

        # Make prediction using the LSTM model
        with torch.no_grad():
            output = model(pose_landmarks)
            predicted_label = (output > 0.5).float().item()

        # Display the result
        if predicted_label == 1:
            cv2.putText(frame, "Fall Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        else:
            cv2.putText(frame, "No Fall Detected", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('Fall Detection', frame)

    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 