### **Mono Audio Feature Extraction**

In [1]:
from __future__ import print_function
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
from pydub import AudioSegment
from pydub.playback import play



In [2]:
file_path = 'Audio_Files/Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_Fall_Mono'
output_path = 'Mono_Audio_Feature/Fall_Feature'
count = 1

fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

for file_name in fall_list + aug_list:
    if file_name.endswith('.wav'):
        if file_name in fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'F{count}.csv')

        # Load audio file and compute mel spectrogram
        y, sr = librosa.load(full_file_name, sr=None, mono=True)  # Ensure mono=True for mono audio
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=248)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # Convert mel spectrogram to DataFrame and save as CSV
        df = pd.DataFrame(mel_spec_db)
        df.to_csv(output_file_name, index=False, header=False)  # Remove index and header

        count += 1

In [3]:
file_path = 'Audio_Files/Non_Fall_Mono'
file_aug_path = 'Aug_Audio_Files/Aug_NonFall_Mono'
output_path = 'Mono_Audio_Feature/Non_Fall_Feature'
count = 1

non_fall_list = [file_name for file_name in os.listdir(file_path) if file_name.endswith('.wav')]
non_aug_list = [file_name for file_name in os.listdir(file_aug_path) if file_name.endswith('.wav')]

for file_name in non_fall_list + non_aug_list:
    if file_name.endswith('.wav'):
        if file_name in non_fall_list:
            full_file_name = os.path.join(file_path, file_name)
        else:
            full_file_name = os.path.join(file_aug_path, file_name)

        output_file_name = os.path.join(output_path, f'NF{count}.csv')

        # Load audio file and compute mel spectrogram
        y, sr = librosa.load(full_file_name, sr=None, mono=True)  # Ensure mono=True for mono audio
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=248)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # Convert mel spectrogram to DataFrame and save as CSV
        df = pd.DataFrame(mel_spec_db)
        df.to_csv(output_file_name, index=False, header=False)  # Remove index and header

        count += 1

#### Combining Data

In [4]:
# %pip install audiomentations

In [5]:
# Function to read the second column from CSV files in a directory
def read_second_column_csv(directory):
    dataframes = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(directory, filename), header=None, usecols=[1])
            flattened_df = df.T
            dataframes.append(flattened_df)
    return pd.concat(dataframes, ignore_index=True)

In [6]:
def detect_outliers_iqr(data):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    outliers = (data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))
    return outliers

In [7]:
from sklearn.utils import resample
from imblearn.over_sampling import KMeansSMOTE

In [8]:
fall_dir = 'Mono_Audio_Feature/Fall_Feature'
non_fall_dir = 'Mono_Audio_Feature/Non_Fall_Feature/'

In [9]:
# Read and combine CSV files
falls_data = read_second_column_csv(fall_dir)
not_falls_data = read_second_column_csv(non_fall_dir)

combined_data = pd.concat([falls_data, not_falls_data])
combined_data['label'] = [1] * len(falls_data) + [0] * len(not_falls_data)

# Apply K-Means SMOTE for balancing
kmeans_smote = KMeansSMOTE(random_state=123, k_neighbors=10, cluster_balance_threshold=0.1)
X = combined_data.drop('label', axis=1)
y = combined_data['label']
outliers = detect_outliers_iqr(X)
X[outliers] = np.nan
X = X.fillna(0)
X_resampled, y_resampled = kmeans_smote.fit_resample(X, y)

# Combine resampled features and labels into a DataFrame
resampled_data = pd.DataFrame(X_resampled, columns=X_resampled.columns)
resampled_data['label'] = y_resampled

# Shuffle the dataset
resampled_data = resampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Separate the dataset by label
falls_data_label = resampled_data[resampled_data['label'] == 1]
not_falls_data_label = resampled_data[resampled_data['label'] == 0]

# Combine the sampled data
combined_sampled_data = pd.concat([falls_data_label, not_falls_data_label]).reset_index(drop=True)

# Shuffle the combined sampled dataset
semi_sampled_data = combined_sampled_data.sample(frac=1, random_state=123).reset_index(drop=True)

# Save to CSV
semi_sampled_data.to_csv('semi_dataset.csv', index=False)


  resampled_data['label'] = y_resampled


In [10]:
semi_sampled_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,label
0,-21.417480,-23.542833,-28.276375,-25.761911,-31.796726,-28.333607,-25.743526,-19.974667,-23.122292,-25.579315,...,-29.099361,-26.976444,-26.251064,-25.988098,-26.829159,-24.889942,-26.256216,-27.550087,-28.025673,0
1,-8.619902,-9.331999,-10.972822,-8.755823,-11.988148,-16.355350,-13.336607,-13.487831,-12.976706,-17.233055,...,-23.777988,-24.564585,-24.670895,-24.039507,-22.315199,-23.711653,-26.844257,-23.131962,-18.167988,1
2,-3.362810,-4.585382,-25.951832,-16.588234,-17.913185,-17.321240,-32.548916,-21.295944,-19.973763,-20.887346,...,-24.331860,-23.915028,-22.570934,-25.050327,-23.392735,-26.348186,-24.229550,-24.651573,-21.372372,1
3,-29.611000,-28.781399,-27.899857,-24.589605,-32.870804,-30.405949,-28.052720,-33.268547,-33.748314,-27.181023,...,-29.616133,-30.712687,-29.539562,-29.444094,-25.234097,-28.561258,-28.470268,-29.311249,-27.975023,0
4,-15.609318,-18.351908,-30.482166,-28.460062,-33.601055,-32.393260,-30.862072,-27.600290,-37.299030,-39.496950,...,-32.826637,-32.471397,-31.994556,-31.968382,-31.343174,-33.002953,-31.854250,-31.341324,-33.587597,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2138,-7.782370,-7.531036,-7.136200,-6.707109,-8.735756,-9.376755,-10.015965,-9.710041,-11.248575,-11.451866,...,-45.443398,-45.952404,-46.250183,-48.136970,-49.632763,-51.604110,-50.048523,-48.017970,-46.710030,0
2139,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
2140,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
2141,-27.133766,-29.242226,-33.886390,-26.340965,-23.770340,-21.596535,-19.660435,-17.210957,-22.325356,-24.889036,...,-26.502022,-25.022924,-25.856735,-28.672522,-25.604668,-26.536835,-26.834572,-27.618155,-26.952877,0
