In [4]:
import os
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm
import json

def get_audio_files(dataset_path):
    audio_files = []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".wav"):
                label = os.path.basename(root)  # Folder name as label
                audio_files.append((os.path.join(root, file), label))
    return audio_files

def extract_features(file_path, target_sr=16000, n_mfcc=13, n_mels=128):
    audio, sr = librosa.load(file_path, sr=target_sr, mono=True)  # Load audio
    
    # Compute Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max)  # Convert to decibels
    
    # Compute MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    
    return mel_spec, mfccs

def process_and_save_features(dataset_path, feature_path):
    os.makedirs(feature_path, exist_ok=True)
    audio_files = get_audio_files(dataset_path)
    labels_dict = {}
    file_counters = {"Cry": 1, "NotScreaming": 1, "Screaming": 1}
    
    for file_path, label in tqdm(audio_files, desc="Extracting Features", unit="file"):
        mel_spec, mfccs = extract_features(file_path)
        
        # Create label folder in feature directory
        label_folder = os.path.join(feature_path, label)
        os.makedirs(label_folder, exist_ok=True)
        
        # Generate new filename
        file_index = file_counters[label]
        file_counters[label] += 1
        mel_file_name = f"{label.lower()}{file_index}_mel.npy"
        mfcc_file_name = f"{label.lower()}{file_index}_mfcc.npy"
        
        # Save features
        mel_path = os.path.join(label_folder, mel_file_name)
        mfcc_path = os.path.join(label_folder, mfcc_file_name)
        np.save(mel_path, mel_spec)
        np.save(mfcc_path, mfccs)
        
        # Store label mapping
        labels_dict[mel_file_name] = label
        labels_dict[mfcc_file_name] = label
    
    # Save labels as JSON
    labels_file = os.path.join(feature_path, "labels.json")
    with open(labels_file, "w") as f:
        json.dump(labels_dict, f)
    
    print("Feature extraction complete! Features saved in", feature_path)

# Example usage
dataset_path = "Datasets_Preprocessed"
feature_path = "Features"
process_and_save_features(dataset_path, feature_path)


Extracting Features: 100%|██████████| 3950/3950 [00:54<00:00, 71.92file/s]

Feature extraction complete! Features saved in Features



