### **Mono Audio Feature Extraction**

In [23]:
from __future__ import print_function
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
from pydub import AudioSegment
from pydub.playback import play

In [24]:
def compute_log_mel_spectrogram(audio_file, output_file):
    # Load audio file and compute log mel spectrogram
    y, sr = librosa.load(audio_file, sr=16000, mono=True)  # Ensure mono=True for mono audio
    hop_length = 1600  # Hop length of 1600, equivalent to 100ms resolution for a 16kHz sampling rate
    n_mels = 248  # Number of mel bins

    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=hop_length, n_mels=n_mels, window='hann')
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    # Convert mel spectrogram to DataFrame and save as CSV
    df = pd.DataFrame(mel_spec_db)
    df.to_csv(output_file, index=False, header=False)  # Remove index and header

In [25]:
file_path = 'Audio_Files/Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_Fall_Mono'
output_path = 'Mono_Audio_Feature/Fall_Feature'
count = 1

fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

count = 1

for file_name in fall_list + aug_list:
    if file_name.endswith('.wav'):
        if file_name in fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'F{count}.csv')

        compute_log_mel_spectrogram(full_file_name, output_file_name)
        count += 1

In [26]:
file_path = 'Audio_Files/Non_Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_NonFall_Mono'
output_path = 'Mono_Audio_Feature/Non_Fall_Feature'
count = 1

non_fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
non_aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

count = 1

for file_name in non_fall_list + non_aug_list:
    if file_name.endswith('.wav'):
        if file_name in non_fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'NF{count}.csv')

        compute_log_mel_spectrogram(full_file_name, output_file_name)
        count += 1

#### Combining Data

In [27]:
# %pip install audiomentations

In [28]:
# Function to read the second column from CSV files in a directory
def read_second_column_csv(directory):
    dataframes = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
            flattened_df = df.T
            dataframes.append(flattened_df)
    return pd.concat(dataframes, ignore_index=True)

In [29]:
def detect_outliers_iqr(data):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    outliers = (data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))
    return outliers

In [30]:
from sklearn.utils import resample
from imblearn.over_sampling import KMeansSMOTE

In [31]:
fall_dir = 'Mono_Audio_Feature/Fall_Feature'
non_fall_dir = 'Mono_Audio_Feature/Non_Fall_Feature/'

In [32]:
# Read and combine CSV files
falls_data = read_second_column_csv(fall_dir)
not_falls_data = read_second_column_csv(non_fall_dir)

combined_data = pd.concat([falls_data, not_falls_data])
combined_data['label'] = [1] * len(falls_data) + [0] * len(not_falls_data)

# Apply K-Means SMOTE for balancing
# kmeans_smote = KMeansSMOTE(random_state=123, k_neighbors=10, cluster_balance_threshold=0.1)
X = combined_data.drop('label', axis=1)
y = combined_data['label']
outliers = detect_outliers_iqr(X)
X[outliers] = np.nan
X = X.fillna(0)
# X_resampled, y_resampled = kmeans_smote.fit_resample(X, y)

# Combine resampled features and labels into a DataFrame
resampled_data = pd.DataFrame(X, columns=X.columns)
resampled_data['label'] = y

# Shuffle the dataset
resampled_data = resampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Separate the dataset by label
falls_data_label = resampled_data[resampled_data['label'] == 1]
not_falls_data_label = resampled_data[resampled_data['label'] == 0]

# Combine the sampled data
combined_sampled_data = pd.concat([falls_data_label, not_falls_data_label]).reset_index(drop=True)

# Shuffle the combined sampled dataset
semi_sampled_data = combined_sampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Save to CSV
semi_sampled_data.to_csv('semi_dataset.csv', index=False)


In [33]:
semi_sampled_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,label
0,-17.367016,-18.522022,-15.313799,-19.613197,-16.777063,-17.388922,-16.400742,-15.747568,-15.597327,-19.664322,...,-19.252520,-18.997600,-18.323433,-17.086378,-17.407870,-22.757816,-31.738060,-40.557884,-61.231880,0
1,-21.458214,-30.133240,-30.742130,-29.145850,-30.572792,-35.227993,-34.079357,-31.762096,-32.060703,-24.828590,...,-28.976099,-29.361496,-32.223710,-28.106268,-29.720820,-32.910750,-38.894154,-49.831642,-69.508470,0
2,-25.159256,-21.607533,-26.014088,-28.379122,-27.387486,-25.389692,-23.646397,-27.911396,-28.604357,-25.982784,...,-27.113888,-26.434692,-27.742886,-25.874157,-24.903069,-27.227484,-34.567596,-46.250900,-64.976524,0
3,-26.802920,-21.937653,-19.836323,-12.614918,-16.865282,-18.816101,-19.604723,-14.982712,-18.493523,-21.047590,...,-26.394920,-25.424662,-27.161050,-27.050632,-26.249231,-28.342196,-33.431282,-43.566864,-68.039990,0
4,-17.187183,-25.164608,-22.871750,-25.189981,-32.652187,-28.734215,-21.164614,-23.379570,-17.864437,-19.711810,...,-20.461939,-20.725765,-20.033371,-22.358604,-21.000017,-24.624655,-34.706676,-49.102135,-67.097490,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3252,0.000000,0.000000,-43.525757,-32.186462,-21.054737,-14.461304,-17.645950,-15.779018,-12.318569,-9.850504,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-80.000000,0
3253,-18.655563,-22.775068,-21.888012,-22.163845,-26.817986,-20.825485,-19.788140,-16.648407,-15.290426,-18.801560,...,-22.316180,-23.975320,-24.320993,-25.448120,-25.195738,-27.087856,-33.504410,-45.177850,-68.243530,0
3254,-20.056500,-16.688795,-17.914246,-19.148100,-18.264357,-5.901232,-1.755371,-8.248135,-17.698820,-13.392283,...,-27.419610,-24.743640,-26.164246,-27.549631,-28.805150,-33.685303,-36.298805,-47.178140,-65.193850,0
3255,-28.260237,-27.845078,-31.504623,-20.878122,-19.897620,-22.575512,-23.864918,-18.751652,-22.145794,-26.852700,...,-19.787062,-21.062695,-21.060146,-22.031246,-23.767380,-27.084179,-33.653210,-43.365505,-61.294400,1
