### **Mono Audio Feature Extraction**

In [1]:
from __future__ import print_function
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
from pydub import AudioSegment
from pydub.playback import play



In [2]:
file_path = 'Audio_Files/Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_Fall_Mono'
output_path = 'Mono_Audio_Feature/Fall_Feature'
count = 1

fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

for file_name in fall_list + aug_list:
    if file_name.endswith('.wav'):
        if file_name in fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'F{count}.csv')

        # Load audio file and compute mel spectrogram
        y, sr = librosa.load(full_file_name, sr=None, mono=True)  # Ensure mono=True for mono audio
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # Convert mel spectrogram to DataFrame and save as CSV
        df = pd.DataFrame(mel_spec_db)
        df.to_csv(output_file_name, index=False, header=False)  # Remove index and header

        count += 1

In [3]:
file_path = 'Audio_Files/Non_Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_NonFall_Mono'
output_path = 'Mono_Audio_Feature/Non_Fall_Feature'
count = 1

non_fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
non_aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

for file_name in non_fall_list + non_aug_list:
    if file_name.endswith('.wav'):
        if file_name in non_fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'NF{count}.csv')

        # Load audio file and compute mel spectrogram
        y, sr = librosa.load(full_file_name, sr=None, mono=True)  # Ensure mono=True for mono audio
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # Convert mel spectrogram to DataFrame and save as CSV
        df = pd.DataFrame(mel_spec_db)
        df.to_csv(output_file_name, index=False, header=False)  # Remove index and header

        count += 1

#### Combining Data

In [4]:
# %pip install audiomentations

In [5]:
# Function to read the second column from CSV files in a directory
def read_second_column_csv(directory):
    dataframes = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
            flattened_df = df.T
            dataframes.append(flattened_df)
    return pd.concat(dataframes, ignore_index=True)

In [6]:
def detect_outliers_iqr(data):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    outliers = (data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))
    return outliers

In [7]:
from sklearn.utils import resample
from imblearn.over_sampling import KMeansSMOTE

In [8]:
fall_dir = 'Mono_Audio_Feature/Fall_Feature'
non_fall_dir = 'Mono_Audio_Feature/Non_Fall_Feature/'

In [9]:
# Read and combine CSV files
falls_data = read_second_column_csv(fall_dir)
not_falls_data = read_second_column_csv(non_fall_dir)

combined_data = pd.concat([falls_data, not_falls_data])
combined_data['label'] = [1] * len(falls_data) + [0] * len(not_falls_data)

# Apply K-Means SMOTE for balancing
kmeans_smote = KMeansSMOTE(random_state=123, k_neighbors=10, cluster_balance_threshold=0.1)
X = combined_data.drop('label', axis=1)
y = combined_data['label']
outliers = detect_outliers_iqr(X)
X[outliers] = np.nan
X = X.fillna(0)
X_resampled, y_resampled = kmeans_smote.fit_resample(X, y)

# Combine resampled features and labels into a DataFrame
resampled_data = pd.DataFrame(X_resampled, columns=X_resampled.columns)
resampled_data['label'] = y_resampled

# Shuffle the dataset
resampled_data = resampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Separate the dataset by label
falls_data_label = resampled_data[resampled_data['label'] == 1]
not_falls_data_label = resampled_data[resampled_data['label'] == 0]

# Combine the sampled data
combined_sampled_data = pd.concat([falls_data_label, not_falls_data_label]).reset_index(drop=True)

# Shuffle the combined sampled dataset
semi_sampled_data = combined_sampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Save to CSV
semi_sampled_data.to_csv('semi_dataset.csv', index=False)


  resampled_data['label'] = y_resampled


In [10]:
semi_sampled_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,label
0,-21.968350,-25.982744,-26.765411,-20.855862,-22.471500,-27.211748,-26.243006,-24.905521,-22.354042,-30.041256,...,-26.790281,-27.437546,-26.201393,-24.573875,-26.612732,-25.382105,-24.956905,-24.214996,-26.023770,0
1,-7.108126,-7.836189,-11.179707,-11.281725,-13.203057,-15.588419,-16.273582,-15.915768,-14.551609,-16.460777,...,-20.727524,-20.344484,-20.896122,-21.127752,-22.161875,-22.219534,-21.346170,-21.860329,-19.258804,1
2,-3.925457,-17.495983,-16.677006,-21.659225,-19.877338,-20.525300,-27.492882,-34.659510,-32.756916,-32.166565,...,-22.991947,-25.003948,-24.073122,-23.416662,-23.364689,-22.346258,-22.791610,-23.832157,-22.254766,1
3,-26.091427,-23.881970,-27.226625,-28.340730,-25.032250,-20.381374,-23.767780,-33.326267,-29.839516,-25.440334,...,-22.758022,-27.675915,-27.136723,-28.049025,-27.871813,-27.468664,-25.049171,-24.952816,-26.168098,0
4,-15.766670,-27.518604,-29.600231,-27.303305,-33.284267,-39.065075,-30.130756,-24.558575,-25.138725,-28.672222,...,-31.605310,-32.595833,-31.674744,-30.082743,-30.301579,-30.002600,-29.373428,-29.930702,-29.614986,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2138,-5.293337,-5.214691,-6.733530,-8.269834,-8.633490,-9.542425,-7.758865,-8.889482,-13.934414,-22.878653,...,-44.661520,-44.520447,-44.150967,-43.484287,-43.648544,-43.897667,-46.052067,-48.573097,-45.791780,0
2139,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
2140,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
2141,-27.073860,-25.027462,-19.622988,-16.914711,-20.665870,-28.678510,-29.754356,-24.508562,-22.877272,-29.693502,...,-24.489070,-24.783980,-25.560001,-25.789270,-24.655838,-23.432590,-25.046330,-24.474985,-25.276287,0
