### **Mono Audio Feature Extraction**

In [10]:
from __future__ import print_function
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
from pydub import AudioSegment
from pydub.playback import play

In [11]:
file_path = 'Audio_Files/Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_Fall_Mono'
output_path = 'Mono_Audio_Feature/Fall_Feature'
count = 1

fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

for file_name in fall_list + aug_list:
    if file_name.endswith('.wav'):
        if file_name in fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'F{count}.csv')

        y, sr = librosa.load(full_file_name)
        duration = librosa.get_duration(y=y)
        onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=2205, aggregate=np.median)

        df = pd.DataFrame(onset_env)
        df.to_csv(output_file_name)

        count += 1

In [12]:
file_path = 'Audio_Files/Non_Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_NonFall_Mono'
output_path = 'Mono_Audio_Feature/Non_Fall_Feature'
count = 1

non_fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
non_aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

for file_name in non_fall_list + non_aug_list:
    if file_name.endswith('.wav'):
        if file_name in non_fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'NF{count}.csv')

        y, sr = librosa.load(full_file_name)
        duration = librosa.get_duration(y=y)
        onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=2205, aggregate=np.median)

        df = pd.DataFrame(onset_env)
        df.to_csv(output_file_name)

        count += 1

#### Combining Data

In [13]:
# %pip install audiomentations

In [14]:
# Function to read the second column from CSV files in a directory
def read_second_column_csv(directory):
    dataframes = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
            flattened_df = df.T
            dataframes.append(flattened_df)
    return pd.concat(dataframes, ignore_index=True)

# # Function to apply various augmentations to audio data
# def augment_audio_data(y, sr, augmentations):
#     augmented_data = []
#     audio = AudioSegment(y, frame_rate=sr)
#     for aug in augmentations:
#         if aug == 'noise':
#             noise = AudioSegment.from_mono_audioarrays(np.random.normal(size=y.shape[0]), sr)
#             y_aug = audio.overlay(noise)
#         elif aug == 'shift':
#             shift_range = int(np.random.uniform(low=-5, high=5) * sr)
#             y_aug = audio[shift_range:] + audio[:shift_range]
#         elif aug == 'stretch':
#             rate = np.random.uniform(low=0.8, high=1.2)
#             y_aug = librosa.effects.time_stretch(y, rate=rate)
#         elif aug == 'pitch':
#             n_steps = np.random.uniform(low=-1, high=1)
#             y_aug = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
#         elif aug == 'reverb':
#             y_aug = AudioSegment.from_mono_audioarray(y, sr)
#             y_aug = y_aug + y_aug.reverse()
#         elif aug == 'speed':
#             speed_rate = np.random.uniform(low=0.7, high=1.3)
#             y_aug = librosa.effects.time_stretch(y, speed_rate)
#         elif aug == 'change_volume':
#             change_in_dBFS = np.random.uniform(low=-10.0, high=10.0)
#             y_aug = AudioSegment.from_mono_audioarray(y, sr)
#             y_aug = y_aug + change_in_dBFS
#         augmented_data.append(y_aug)
#     return augmented_data


# # # Function to add augmentations to a dataset
# # def add_augmentations_fall(directory, sr=16000, augmentations=['shift'], repeats=10):
# #     augmented_dataframes = []
# #     for filename in os.listdir(directory):
# #         if filename.endswith('.csv'):
# #             df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
# #             y = df.iloc[:, 0].values
# #             for _ in range(repeats):  # Repeat the augmentations
# #                 augmented_clips = augment_audio_data(y, sr, augmentations)
# #                 for aug_clip in augmented_clips:
# #                     aug_features = librosa.feature.mfcc(y=aug_clip, sr=sr, n_mfcc=13)
# #                     flattened_features = np.mean(aug_features, axis=1).reshape(1, -1)
# #                     augmented_dataframes.append(pd.DataFrame(flattened_features))
# #     return pd.concat(augmented_dataframes, ignore_index=True)
    

# # Function to add augmentations to a dataset
# def add_augmentations_non_fall(directory, sr=16000, augmentations=['noise', 'shift', 'stretch', 'pitch', 'reverb', 'speed', 'change_volume'], repeats=10):
#     augmented_dataframes = []
#     for filename in os.listdir(directory):
#         if filename.endswith('.csv'):
#             df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
#             y = df.iloc[:, 0].values
#             for _ in range(repeats):  # Repeat the augmentations
#                 augmented_clips = augment_audio_data(y, sr, augmentations)
#                 for aug_clip in augmented_clips:
#                     aug_features = librosa.feature.mfcc(y=aug_clip, sr=sr, n_mfcc=13)
#                     flattened_features = np.mean(aug_features, axis=1).reshape(1, -1)
#                     augmented_dataframes.append(pd.DataFrame(flattened_features))
#     return pd.concat(augmented_dataframes, ignore_index=True)

In [15]:
from sklearn.utils import resample
# from imblearn.over_sampling import KMeansSMOTE

In [16]:
fall_dir = 'Mono_Audio_Feature/Fall_Feature'
non_fall_dir = 'Mono_Audio_Feature/Non_Fall_Feature/'

In [17]:
# Read and combine CSV files
falls_data = read_second_column_csv(fall_dir)
not_falls_data = read_second_column_csv(non_fall_dir)

# # Apply augmentations
# # falls_augmented_data = add_augmentations_fall(fall_dir)
# not_falls_augmented_data = add_augmentations_non_fall(non_fall_dir)

# Combine original and augmented data
# combined_data = pd.concat([falls_data, not_falls_data, falls_augmented_data, not_falls_augmented_data])
combined_data = pd.concat([falls_data, not_falls_data])
combined_data['label'] = [1] * len(falls_data) + [0] * len(not_falls_data)
                        #  [0] * len(not_falls_augmented_data)
                        #  [1] * len(falls_augmented_data) + [0] * len(not_falls_augmented_data)

# outliers = detect_outliers_iqr(combined_data)
# combined_data[outliers] = np.nan

# Apply K-Means SMOTE for balancing
# kmeans_smote = KMeansSMOTE(random_state=123, k_neighbors=10, cluster_balance_threshold=0.1)
X = combined_data.drop('label', axis=1)
y = combined_data['label']
X = X.fillna(0)
# X_resampled, y_resampled = kmeans_smote.fit_resample(X, y)

# Combine resampled features and labels into a DataFrame
resampled_data = pd.DataFrame(X, columns=X.columns)
resampled_data['label'] = y

# Shuffle the dataset
resampled_data = resampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Separate the dataset by label
falls_data_label = resampled_data[resampled_data['label'] == 1]
not_falls_data_label = resampled_data[resampled_data['label'] == 0]

# # Sample 350 entries from each subset
# sampled_falls_data = falls_data_label.sample(n=1200, random_state=123).reset_index(drop=True)
# sampled_not_falls_data = not_falls_data_label.sample(n=1200, random_state=123).reset_index(drop=True)

# Combine the sampled data
combined_sampled_data = pd.concat([falls_data_label, not_falls_data_label]).reset_index(drop=True)

# Shuffle the combined sampled dataset
semi_sampled_data = combined_sampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Save to CSV
semi_sampled_data.to_csv('semi_dataset.csv', index=False)

  combined_data['label'] = [1] * len(falls_data) + [0] * len(not_falls_data)


In [18]:
semi_sampled_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,label
0,0.0,0.0,4.136897,0.104953,0.000000,0.000000,0.496076,0.000000,0.606687,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.0,0.0,3.794677,0.000000,0.222858,0.000000,0.377832,0.000000,0.336196,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.0,0.0,2.990320,0.000000,1.624435,0.000000,0.041311,0.000000,0.000000,0.937735,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,3.409440,0.000000,4.202390,0.000000,32.334236,4.225602,0.000000,0.013591,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,2.955694,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.218848,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1702,0.0,0.0,29.042015,0.000000,0.000000,0.000000,0.649817,0.350354,0.251033,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1703,0.0,0.0,5.318294,0.071152,0.119624,0.151981,0.015439,0.000000,0.440911,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1704,0.0,0.0,2.844302,0.000000,0.336873,0.000000,0.000000,0.000000,0.422914,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1705,0.0,0.0,2.967865,0.000000,0.743209,0.000000,0.891525,0.242469,0.201547,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
