In [20]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Function to extract features from audio
def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)
        spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
        chroma_mean = np.mean(chroma, axis=1)
        zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)
        zero_crossing_rate_mean = np.mean(zero_crossing_rate)

        features = np.concatenate([mfcc_mean, mfcc_std, spectral_contrast_mean, chroma_mean, [zero_crossing_rate_mean]])
        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Loading data paths
data_path_dict = {
    0: ["background_sound/" + file_path for file_path in os.listdir("background_sound/")],
    1: ["audio_data/" + file_path for file_path in os.listdir("audio_data/")]
}

# Extracting features
all_data = []
for class_label, list_of_files in data_path_dict.items():
    for single_file in list_of_files:
        if os.path.isfile(single_file):  # Check if it's a file
            features = extract_features(single_file)
            if features is not None:
                all_data.append([features, class_label])
        else:
            print(f"{single_file} is not a valid audio file.")

# Creating DataFrame
df = pd.DataFrame(all_data, columns=["feature", "class_label"])

# Saving DataFrame
df.to_pickle("final_audio_data_csv/audio_data_extended.csv")
# df.to_csv("final_audio_data_csv/audio_data_extended.csv", index=False)
