In [7]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("antonygarciag/fall-audio-detection-dataset")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\bryan\.cache\kagglehub\datasets\antonygarciag\fall-audio-detection-dataset\versions\5


In [10]:
import os
import pandas as pd

# Path to dataset
DATASET_PATH = "C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall-audio-detection-dataset"

# List all audio files
files = [f for f in os.listdir(DATASET_PATH) if f.endswith(".wav")]

# Extract labels from filenames
data = []
for file in files:
    parts = file.split("-")  # Split by '-'
    
    if len(parts) >= 5:
        label = int(parts[-1].split(".")[0])  # Extract FF (last number before .wav)
        label = 1 if label == 1 else 0  # Convert to binary (1 = Fall, 0 = Non-Fall)
        data.append([file, label])

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Filename", "Label"])

# Save labeled dataset
csv_path = os.path.join(DATASET_PATH, "labeled_dataset.csv")
df.to_csv(csv_path, index=False)

print(f"[INFO] Labeled dataset saved at: {csv_path}")
print(df.head())  # Display first few rows

[INFO] Labeled dataset saved at: C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall-audio-detection-dataset\labeled_dataset.csv
               Filename  Label
0  01-020-02-073-01.wav      1
1  01-022-07-014-01.wav      1
2  01-025-00-304-02.wav      0
3  01-028-01-028-01.wav      1
4  01-029-00-330-02.wav      0


In [11]:
# Load labeled dataset
df_labels = pd.read_csv(os.path.join(DATASET_PATH, "C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall-audio-detection-dataset/labeled_dataset.csv"))

# Create file-label mapping
file_to_label = dict(zip(df_labels["Filename"], df_labels["Label"]))

# Load dataset with proper labels
train_files = []
train_labels = []

for file in os.listdir(DATASET_PATH):
    if file.endswith(".wav") and file in file_to_label:
        train_files.append(os.path.join(DATASET_PATH, file))
        train_labels.append(file_to_label[file])  # Use actual label

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import librosa
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from tqdm import tqdm

# --- Set device (Use GPU if available) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")

# --- Paths ---
DATASET_PATH = "C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall-audio-detection-dataset"
LABELS_FILE = os.path.join(DATASET_PATH, "labeled_dataset.csv")

# --- Load labeled dataset ---
try:
    df_labels = pd.read_csv(LABELS_FILE)
except FileNotFoundError:
    print(f"[ERROR] Labels file not found: {LABELS_FILE}")
    exit()

file_to_label = dict(zip(df_labels["Filename"], df_labels["Label"]))

# --- Feature Extraction: MFCC Only ---
def extract_features(audio_file):
    try:
        y, sr = librosa.load(audio_file, sr=16000)

        # Extract MFCC (Only 20 features)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        mfcc_mean = np.mean(mfcc, axis=1)  # Take the mean across time axis

        return mfcc_mean  # Only MFCC features (20 values)

    except Exception as e:
        print(f"[WARNING] Failed to extract features from {audio_file}: {e}")
        return None

# --- Custom Dataset Class ---
class FallDataset(Dataset):
    def __init__(self, dataset_path, file_to_label):
        self.file_paths = list(file_to_label.keys())
        self.labels = list(file_to_label.values())
        self.dataset_path = dataset_path

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = os.path.join(self.dataset_path, self.file_paths[idx])
        features = extract_features(file_path)

        # Handle missing files
        if features is None:
            return self.__getitem__((idx + 1) % len(self.file_paths))  # Skip to next sample

        label = float(self.labels[idx])  # Ensure labels are float32 for BCELoss
        return torch.tensor(features, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

# --- Load dataset ---
dataset = FallDataset(DATASET_PATH, file_to_label)

# --- Split dataset into training (80%) and validation (20%) ---
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"[INFO] Training samples: {train_size}, Validation samples: {val_size}")

# --- Define Fall Detection Model (Uses Only MFCC Features) ---
class FallDetectionModel(nn.Module):
    def __init__(self, input_size=20):  # Only 20 MFCC features
        super(FallDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# --- Initialize Model ---
fall_model = FallDetectionModel().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(fall_model.parameters(), lr=0.001)

# --- Train Model ---
EPOCHS = 20
print("[INFO] Training started...")

for epoch in range(EPOCHS):
    fall_model.train()
    total_train_loss = 0.0
    total_val_loss = 0.0

    # --- Training Loop ---
    for features, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Training]"):
        features, labels = features.to(device), labels.to(device).unsqueeze(1)  # Adjust for BCELoss

        optimizer.zero_grad()
        outputs = fall_model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    # --- Validation Loop ---
    fall_model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for features, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Validation]"):
            features, labels = features.to(device), labels.to(device).unsqueeze(1)

            outputs = fall_model(features)
            loss = criterion(outputs, labels)
            total_val_loss += loss.item()

            # Calculate accuracy
            predictions = (outputs > 0.5).float()
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    val_accuracy = correct / total
    print(f"Epoch [{epoch+1}/{EPOCHS}] - "
          f"Train Loss: {total_train_loss / len(train_loader):.4f} | "
          f"Val Loss: {total_val_loss / len(val_loader):.4f} | "
          f"Val Acc: {val_accuracy:.4f}")

# --- Save the Trained Model ---
MODEL_SAVE_PATH = "C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall_detection_model.pth"
torch.save(fall_model.state_dict(), MODEL_SAVE_PATH)
print(f"[INFO] New model saved at {MODEL_SAVE_PATH}")

[INFO] Using device: cuda
[INFO] Training samples: 760, Validation samples: 190
[INFO] Training started...


Epoch 1/20 [Training]: 100%|██████████| 24/24 [00:12<00:00,  1.85it/s]
Epoch 1/20 [Validation]: 100%|██████████| 6/6 [00:03<00:00,  1.94it/s]


Epoch [1/20] - Train Loss: 1.0377 | Val Loss: 0.4386 | Val Acc: 0.7842


Epoch 2/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.15it/s]
Epoch 2/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.41it/s]


Epoch [2/20] - Train Loss: 0.3920 | Val Loss: 0.2893 | Val Acc: 0.8579


Epoch 3/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.39it/s]
Epoch 3/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.50it/s]


Epoch [3/20] - Train Loss: 0.2764 | Val Loss: 0.2302 | Val Acc: 0.9053


Epoch 4/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.24it/s]
Epoch 4/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.09it/s]


Epoch [4/20] - Train Loss: 0.2255 | Val Loss: 0.1838 | Val Acc: 0.9368


Epoch 5/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.92it/s]
Epoch 5/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.30it/s]


Epoch [5/20] - Train Loss: 0.1996 | Val Loss: 0.1628 | Val Acc: 0.9421


Epoch 6/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.07it/s]
Epoch 6/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.25it/s]


Epoch [6/20] - Train Loss: 0.1872 | Val Loss: 0.1475 | Val Acc: 0.9474


Epoch 7/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.08it/s]
Epoch 7/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.04it/s]


Epoch [7/20] - Train Loss: 0.1721 | Val Loss: 0.1369 | Val Acc: 0.9474


Epoch 8/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.02it/s]
Epoch 8/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.08it/s]


Epoch [8/20] - Train Loss: 0.1705 | Val Loss: 0.1583 | Val Acc: 0.9421


Epoch 9/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.21it/s]
Epoch 9/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.03it/s]


Epoch [9/20] - Train Loss: 0.1623 | Val Loss: 0.1271 | Val Acc: 0.9526


Epoch 10/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.99it/s]
Epoch 10/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.83it/s]


Epoch [10/20] - Train Loss: 0.1571 | Val Loss: 0.1279 | Val Acc: 0.9474


Epoch 11/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.87it/s]
Epoch 11/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.79it/s]


Epoch [11/20] - Train Loss: 0.1537 | Val Loss: 0.1232 | Val Acc: 0.9474


Epoch 12/20 [Training]: 100%|██████████| 24/24 [00:03<00:00,  6.05it/s]
Epoch 12/20 [Validation]: 100%|██████████| 6/6 [00:00<00:00,  6.04it/s]


Epoch [12/20] - Train Loss: 0.1522 | Val Loss: 0.1190 | Val Acc: 0.9474


Epoch 13/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.90it/s]
Epoch 13/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.77it/s]


Epoch [13/20] - Train Loss: 0.1502 | Val Loss: 0.1443 | Val Acc: 0.9421


Epoch 14/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.78it/s]
Epoch 14/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.81it/s]


Epoch [14/20] - Train Loss: 0.1606 | Val Loss: 0.1223 | Val Acc: 0.9579


Epoch 15/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.94it/s]
Epoch 15/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.97it/s]


Epoch [15/20] - Train Loss: 0.1706 | Val Loss: 0.1405 | Val Acc: 0.9421


Epoch 16/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.72it/s]
Epoch 16/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.77it/s]


Epoch [16/20] - Train Loss: 0.1497 | Val Loss: 0.1583 | Val Acc: 0.9368


Epoch 17/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.68it/s]
Epoch 17/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.49it/s]


Epoch [17/20] - Train Loss: 0.1459 | Val Loss: 0.1146 | Val Acc: 0.9684


Epoch 18/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.44it/s]
Epoch 18/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.79it/s]


Epoch [18/20] - Train Loss: 0.1432 | Val Loss: 0.1138 | Val Acc: 0.9684


Epoch 19/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.69it/s]
Epoch 19/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.62it/s]


Epoch [19/20] - Train Loss: 0.1464 | Val Loss: 0.1122 | Val Acc: 0.9474


Epoch 20/20 [Training]: 100%|██████████| 24/24 [00:04<00:00,  5.70it/s]
Epoch 20/20 [Validation]: 100%|██████████| 6/6 [00:01<00:00,  5.76it/s]

Epoch [20/20] - Train Loss: 0.1420 | Val Loss: 0.1178 | Val Acc: 0.9579
[INFO] New model saved at C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall_detection_model.pth





In [None]:
import pyaudio
import wave
import numpy as np
import librosa
import torch
import torchaudio
from vosk import Model, KaldiRecognizer
import json
import time

# --- Constants ---
FORMAT = pyaudio.paInt16  # 16-bit audio
CHANNELS = 1  # Mono
RATE = 16000  # Sampling rate
CHUNK = 1024  # Audio chunk size
MODEL_PATH = "C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/vosk-model-small-en-us-0.15"  # Vosk model for speech recognition
FALL_DETECTION_MODEL_PATH = "C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall_detection_model.pth"  # Pretrained fall detection model
RECORD_SECONDS = 6  # Duration for each audio capture
FALL_THRESHOLD = 0.9  # Increased threshold to reduce false positives

# --- Load PyTorch Model for Fall Detection ---
class FallDetectionModel(torch.nn.Module):
    def __init__(self):
        super(FallDetectionModel, self).__init__()
        self.fc1 = torch.nn.Linear(20, 64)
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 1)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Load the trained fall detection model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fall_model = FallDetectionModel().to(device)
fall_model.load_state_dict(torch.load(FALL_DETECTION_MODEL_PATH, map_location=device))
fall_model.eval()

# --- Function to Record Audio ---
def record_audio(output_filename="recorded_audio.wav"):
    print(f"[INFO] Recording {RECORD_SECONDS} seconds of audio...")
    audio = pyaudio.PyAudio()
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,
                        frames_per_buffer=CHUNK)
    
    frames = []
    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        frames.append(stream.read(CHUNK))

    print("[INFO] Recording complete.")
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save to file
    with wave.open(output_filename, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(audio.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))

    return output_filename

def extract_mfcc(audio_file):
    try:
        y, sr = librosa.load(audio_file, sr=16000)
        
        # Extract MFCC (Only 20 features)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        mfcc_mean = np.mean(mfcc, axis=1)  # Take the mean across time axis

        
        return mfcc_mean  # Only MFCC features (20 values)

    except Exception as e:
        print(f"[ERROR] Failed to extract features from {audio_file}: {e}")
        return None
    
def test_audio(audio_file):
    print(f"[INFO] Testing audio: {audio_file}")

    features = extract_mfcc(audio_file)

    if features is None:
        print("[ERROR] Could not extract features from the audio file.")
        return

    tensor_input = torch.tensor(features, dtype=torch.float32).to(device).unsqueeze(0)

    with torch.no_grad():
        prediction = fall_model(tensor_input).item()

    print(f"[DEBUG] Fall Detection Model Output: {prediction}")

    if prediction > 0.8:
        print("[ALERT] Fall Detected!")
    else:
        print("[INFO] No Fall Detected.")

# --- Function to Detect Fall ---
def detect_fall(audio_file):
    mfcc_features = extract_mfcc(audio_file)
    mfcc_tensor = torch.tensor(mfcc_features, dtype=torch.float32).to(device).unsqueeze(0)

    with torch.no_grad():
        prediction = fall_model(mfcc_tensor).item()

    print(f"[DEBUG] Fall Detection Model Output: {prediction}")

    if prediction > FALL_THRESHOLD:  # Increased threshold
        print("[ALERT] Fall Detected!")
        return True
    return False

"""
# --- Function to Perform Speech Recognition ---
def recognize_speech(audio_file):
    print("[INFO] Performing speech recognition...")
    
    model = Model(MODEL_PATH)
    rec = KaldiRecognizer(model, RATE)

    with wave.open(audio_file, "rb") as wf:
        while True:
            data = wf.readframes(CHUNK)
            if not data:
                break
            if rec.AcceptWaveform(data):
                result = json.loads(rec.Result())
                text = result.get("text", "").strip()
                if text:
                    print(f"[INFO] Recognized Text: {text}")

                    # Check for distress call
                    if any(word in text.lower() for word in ["help", "fall", "ouch", "emergency"]):
                        print("[ALERT] Patient asking for help detected!")
                        return text
    return None
"""
# --- Function to Capture Live Audio ---
def capture_live_audio():
    print("[INFO] Capturing live audio...")
    duration = 5  # Capture for 5 seconds
    audio_data = sd.rec(int(duration * RATE), samplerate=RATE, channels=1, dtype='int16')
    sd.wait()
    return audio_data

# --- Function to Process Live Audio for Fall Detection ---
def analyze_live_audio():
    audio_data = capture_live_audio()
    audio_data = np.squeeze(audio_data)  # Remove unnecessary dimensions
    mfcc_features = librosa.feature.mfcc(y=audio_data.astype(float), sr=RATE, n_mfcc=20)
    mfcc_features = np.mean(mfcc_features, axis=1)

    mfcc_tensor = torch.tensor(mfcc_features, dtype=torch.float32).to(device).unsqueeze(0)
    with torch.no_grad():
        prediction = fall_model(mfcc_tensor).item()

    if prediction > 0.8:
        print("[ALERT] Fall Detected in Live Audio!")
        return True
    return False

# --- Main Loop ---
if __name__ == "__main__":
    while True:
        print("\n[INFO] Capturing Audio...")
        recorded_file = record_audio()  # Capture audio

        # Fall Detection
        fall_detected = detect_fall(recorded_file)
        if fall_detected:
            print("[INFO] Sending fall alert to Raspberry Pi...")
            break  # Stop loop on fall detection

        # Speech Recognition
        distress_text = recognize_speech(recorded_file)
        if distress_text:
            print("[INFO] Emergency detected, alerting caregivers!")
            break

        time.sleep(1)  # Small delay before next capture
        
        #Capturing recorded audio to detect for fall/non-fall works but the real-time record is a bit inconsistent
        test_audio("C:/Users/bryan/OneDrive/Desktop/Project/DontFall/audio/fall-audio-detection-dataset/01-022-07-014-01.wav")

  fall_model.load_state_dict(torch.load(FALL_DETECTION_MODEL_PATH, map_location=device))



[INFO] Capturing Audio...
[INFO] Recording 7 seconds of audio...
[INFO] Recording complete.
[DEBUG] Fall Detection Model Output: 0.7413294911384583
[INFO] Performing speech recognition...


KeyboardInterrupt: 