### **Mono Audio Feature Extraction**

In [1]:
from __future__ import print_function
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
from pydub import AudioSegment
from pydub.playback import play



In [2]:
def compute_log_mel_spectrogram(audio_file, output_file):
    # Load audio file and compute log mel spectrogram
    y, sr = librosa.load(audio_file, sr=16000, mono=True)  # Ensure mono=True for mono audio
    hop_length = 1600  # Hop length of 1600, equivalent to 100ms resolution for a 16kHz sampling rate
    n_mels = 248  # Number of mel bins

    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=hop_length, n_mels=n_mels, window='hann')
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    # Convert mel spectrogram to DataFrame and save as CSV
    df = pd.DataFrame(mel_spec_db)
    df.to_csv(output_file, index=False, header=False)  # Remove index and header

In [3]:
file_path = 'C:/Users/wonha/OneDrive/Desktop/Wonha/UNIV/Personal_Projects/Acoustic_Fall_Detection_Data/Audio_Files/Fall_Mono'
file_aug_path = 'C:/Users/wonha/OneDrive/Desktop/Wonha/UNIV/Personal_Projects/Acoustic_Fall_Detection_Data/Aug_Audio_Files/Aug_Fall_Mono'
output_path = 'Mono_Audio_Feature/Fall_Feature'
count = 1

fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

count = 1

for file_name in fall_list + aug_list:
    if file_name.endswith('.wav'):
        if file_name in fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'F{count}.csv')

        compute_log_mel_spectrogram(full_file_name, output_file_name)
        count += 1

In [4]:
file_path = 'C:/Users/wonha/OneDrive/Desktop/Wonha/UNIV/Personal_Projects/Acoustic_Fall_Detection_Data/Audio_Files/Non_Fall_Mono'
file_aug_path = "C:/Users/wonha/OneDrive/Desktop/Wonha/UNIV/Personal_Projects/Acoustic_Fall_Detection_Data/Aug_Audio_Files/Aug_NonFall_Mono"
output_path = 'Mono_Audio_Feature/Non_Fall_Feature'
count = 1

non_fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
non_aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

count = 1

for file_name in non_fall_list + non_aug_list:
    if file_name.endswith('.wav'):
        if file_name in non_fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'NF{count}.csv')

        compute_log_mel_spectrogram(full_file_name, output_file_name)
        count += 1

#### Combining Data

In [5]:
# %pip install audiomentations

In [6]:
# Function to read the second column from CSV files in a directory
def read_second_column_csv(directory):
    dataframes = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
            flattened_df = df.T
            dataframes.append(flattened_df)
    return pd.concat(dataframes, ignore_index=True)

In [7]:
def detect_outliers_iqr(data):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    outliers = (data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))
    return outliers

In [8]:
from sklearn.utils import resample
from imblearn.over_sampling import KMeansSMOTE

In [9]:
fall_dir = 'Mono_Audio_Feature/Fall_Feature'
non_fall_dir = 'Mono_Audio_Feature/Non_Fall_Feature/'

In [10]:
# Read and combine CSV files
falls_data = read_second_column_csv(fall_dir)
not_falls_data = read_second_column_csv(non_fall_dir)

combined_data = pd.concat([falls_data, not_falls_data])
combined_data['label'] = [1] * len(falls_data) + [0] * len(not_falls_data)

# Apply K-Means SMOTE for balancing
# kmeans_smote = KMeansSMOTE(random_state=123, k_neighbors=10, cluster_balance_threshold=0.1)
X = combined_data.drop('label', axis=1)
y = combined_data['label']
outliers = detect_outliers_iqr(X)
X[outliers] = np.nan
X = X.fillna(0)
# X_resampled, y_resampled = kmeans_smote.fit_resample(X, y)

# Combine resampled features and labels into a DataFrame
resampled_data = pd.DataFrame(X, columns=X.columns)
resampled_data['label'] = y

# Shuffle the dataset
resampled_data = resampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Separate the dataset by label
falls_data_label = resampled_data[resampled_data['label'] == 1]
not_falls_data_label = resampled_data[resampled_data['label'] == 0]

# Combine the sampled data
combined_sampled_data = pd.concat([falls_data_label, not_falls_data_label]).reset_index(drop=True)

# Shuffle the combined sampled dataset
semi_sampled_data = combined_sampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Save to CSV
semi_sampled_data.to_csv('semi_dataset.csv', index=False)


In [11]:
semi_sampled_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,label
0,-0.170567,-2.863611,-11.380465,-10.153284,-17.159174,-13.676621,-20.788332,-19.946690,-19.809065,-20.808840,...,-25.004623,-25.993270,-24.824280,-23.774748,-27.121927,-32.642574,-38.542233,-45.523872,-65.422330,1
1,-8.364413,-8.946608,-11.667658,-10.828299,-14.398639,-13.858652,-22.257261,-17.883717,-13.649246,-14.208684,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
2,-15.605698,-2.029823,0.000004,-8.575342,-11.263519,-18.136984,-12.358812,-12.538837,-17.098675,-15.886677,...,-27.680069,-25.532602,-24.936360,-25.656546,-26.165462,-28.337862,-34.472350,-45.979145,-66.568480,1
3,-20.021217,-28.191566,-22.107044,-17.448956,-20.313904,-22.363234,-22.982200,-24.235954,-28.240350,-35.768450,...,-22.714386,-23.972736,-23.776968,-23.405703,-27.790257,-27.895767,-32.649906,-42.351994,-65.839600,1
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-80.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7902,-17.515469,-20.028608,-19.508833,-13.599602,-14.856102,-15.237173,-14.752804,-14.556040,-16.754156,-14.166658,...,-24.709284,-25.529789,-25.106234,-25.231050,-24.547281,-27.444952,-34.215622,-46.982870,-68.582016,0
7903,-9.748308,-10.883257,-12.757355,-10.999786,-18.375664,-20.483051,-20.146587,-22.082819,-17.612251,-24.581600,...,-23.952415,-24.172490,-23.391968,-20.286232,-21.388020,-24.087059,-31.755615,-41.660400,-64.603195,1
7904,-16.184322,-23.643002,-27.439102,-30.346243,-29.204670,-25.858692,-27.525547,-27.097847,-30.687157,-29.351685,...,-28.639427,-26.039127,-23.491758,-24.946430,-26.571869,-31.040773,-36.345657,-44.086792,-66.746450,0
7905,-12.948719,-13.054707,-16.084272,-18.808756,-16.480556,-9.290979,-6.280603,-9.397696,-11.592749,-10.077892,...,-32.051884,-34.189365,-35.338505,-34.820370,-32.681953,-35.610203,-41.549750,-56.542213,-74.652380,0
