Libary for feature exraction

In [1]:
import librosa
import os
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm import tqdm 

feature exration function 

In [2]:
def plot(mfcc_features,title):
    try:
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(mfcc_features, x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title(title)
        plt.tight_layout()
        plt.show()
    except:
        print("image cant be formed for",title)

In [3]:
#----------------------------------------------------------------------------------------------------#

def get_STFT_mfcc(audio_file, sr=22050, n_fft=2048, hop_length=512, n_mfcc=13):
    y, sr = librosa.load(audio_file, sr=sr)
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    mfcc = librosa.feature.mfcc(S=np.abs(D), sr=sr, n_mfcc=n_mfcc)
    return mfcc

#----------------------------------------------------------------------------------------------------#

def get_STFT_mfcc2(audio_file, sr=22050, n_fft=2048, hop_length=512, n_mfcc=13):
    y, sr = librosa.load(audio_file, sr=sr)
    stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    S_db = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
    mfcc = librosa.feature.mfcc(S=S_db, sr=sr, n_mfcc=n_mfcc)
    return mfcc

#------------------------------------------------------------------------------------------------------#

def get_STFT_mel(audio_path, n_fft=2048, hop_length=512, n_mels=128, sr=22050):
    y, sr = librosa.load(audio_path, sr=sr)
    stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    mel = librosa.feature.melspectrogram(S=np.abs(stft), n_mels=n_mels, sr=sr)
    mel_power= librosa.power_to_db(mel, ref=np.max)
    return mel_power

#------------------------------------------------------------------------------------------------------#

def get_STFT_me1_mag(audio_file, n_fft=2048, hop_length=512, n_mels=128):
    y, sr = librosa.load(audio_file)
    Stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    D_magnitude, _ = librosa.magphase(Stft)
    mel_filter_bank = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
    mel_spectrogram = np.dot(mel_filter_bank, D_magnitude)
    mel_spect_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return mel_spect_db

#-------------------------------------------------------------------------------------------------------#

def get_STFT_melx(audio_path, n_fft=2048, hop_length=512, n_mels=128, sr=22050, normalize=True):
    y, sr = librosa.load(audio_path, sr=sr)
    stft = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))**2#
    mel_filter_bank = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
    mel = np.dot(mel_filter_bank, stft)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    if normalize:
        mel_min = mel_db.min()
        mel_max = mel_db.max()
        mel_range = mel_max - mel_min
        if mel_range == 0:
            mel_db_scaled = mel_db  # Avoid division by zero; return original
        else:
            mel_db_scaled = (mel_db - mel_min) / mel_range
            
    return mel_db_scaled

#---------------------------------------------------------------------------------------------------------#

def get_mel(audio_file, n_mels=128, sr=22050):
    y, sr = librosa.load(audio_file, sr=sr)
    mel_spect = librosa.feature.melspectrogram(y=y,sr=sr,n_mels=n_mels,)
    mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
    return mel_spect_db

#----------------------------------------------------------------------------------------------------------#

def stft_mel_mfcc(audio_file, n_fft=2048, hop_length=512, n_mels=128, n_mfcc=13):
    y, sr = librosa.load(audio_file)
    STFT = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    STFT_magnitude, _ = librosa.magphase(STFT)
    mel_filter_bank = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
    mel_spectrogram = np.dot(mel_filter_bank, STFT_magnitude)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram), n_mfcc=n_mfcc)  
    return mfcc

#-------------------------------------------------------------------------------------------------------------#

def get_stft_power_mel_mfcc(audio_file, n_fft=2048, hop_length=512, n_mels=128, n_mfcc=13):
    y, sr = librosa.load(audio_file, sr=None)
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    stft_magnitude = np.abs(D)
    mel_spectrogram = librosa.feature.melspectrogram(S=stft_magnitude**2, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram), sr=sr, n_mfcc=n_mfcc)
    return  mfcc

#---------------------------------------------------------------------------------------------------------------#

def mfcc(audio_file, n_mfcc=13):
    y, sr = librosa.load(audio_file)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfcc

#---------------------------------------------------------------------------------------------------------------#

def scale_features(features):
    return np.mean(features.T, axis=0)

#---------------------------------------------------------------------------------------------------------------#


Displaying

In [4]:
import os
import librosa
import numpy as np
import pandas as pd
    
def feature_extraction(input_folder):
    data = []
    for root, _, files in os.walk(input_folder):
        class_label = os.path.basename(root)
        files.sort()
        Error=[]
        for file in tqdm(files, desc=f'Processing {class_label}', unit='file'):
            audio_file = os.path.join(root, file)
            try:
                row = {
                    "Filepath": audio_file,
                    "Class": class_label,
                    "Duration": librosa.get_duration(path=audio_file),
                    "STFT_MFCC": scale_features(get_STFT_mfcc(audio_file)),
                    "STFT_MFCC2": scale_features(get_STFT_mfcc2(audio_file)),
                    "STFT_Mel": scale_features(get_STFT_mel(audio_file)),
                    "STFT_Mel1_Mag": scale_features(get_STFT_me1_mag(audio_file)),
                    "STFT_MelX": scale_features(get_STFT_melx(audio_file)),
                    "Mel": scale_features(get_mel(audio_file)),
                    "STFT_Mel_MFCC": scale_features(stft_mel_mfcc(audio_file)),
                    "STFT_Power_Mel_MFCC": scale_features(get_stft_power_mel_mfcc(audio_file)),
                    }
                
                data.append(row)
            except Exception as e:
                Error.append(f"Error processing {audio_file=}: {e}")
                pass
                    
        if Error:
            print( Er for Er in Error)
        print("----------------------------------------------------------------------------------")

    df = pd.DataFrame(data)
    return df

input_folder = "DataSet"


df = feature_extraction(input_folder)
df.to_csv("audio_features.csv", index=False)




Processing DataSet: 0file [00:00, ?file/s]


----------------------------------------------------------------------------------


Processing non_scream: 100%|██████████| 1545/1545 [03:26<00:00,  7.50file/s]


----------------------------------------------------------------------------------


Processing scream: 100%|██████████| 1583/1583 [06:23<00:00,  4.13file/s]


----------------------------------------------------------------------------------
