In [73]:
import os
import glob
import librosa
import numpy as np

# Feature extraction

In [11]:
##Return audio features
def feature_extraction(file_name):
    X , sample_rate = librosa.load(file_name, sr=None) #Can also load file using librosa
    if X.ndim > 1:
        X = X[:,0]
    X = X.T

    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=20).T, axis=0) #Returns N_mel coefs
    rmse = np.mean(librosa.feature.rms(y=X).T, axis=0) #RMS Energy for each Frame (Stanford's). Returns 1 value
    spectral_flux = np.mean(librosa.onset.onset_strength(y=X, sr=sample_rate).T, axis=0) #Spectral Flux (Stanford's). Returns 1 Value
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=X).T, axis=0) #Returns 1 value

    ##Return computed audio features
    return mfccs, rmse, spectral_flux, zcr

# Audio parsing: Function makes call for feature extraction and returns array with features and labels
def parse_audio_files_mfcc(parent_dir, sub_dirs, file_ext='*.mp3'): # Audio Format
    n_mfccs = 20 # This variable is tunneable with each run
    number_of_features = 3 + n_mfccs
    #number_of_features = 154 + n_mfccs # 154 are the total values returned by rest of computed features
    features, labels = np.empty((0,number_of_features)), np.empty(0)
    filenames = np.empty(0, dtype=object)
    ##Extract features for each audio file
    for label, sub_dir in enumerate(sub_dirs): ##The enumerate() function adds a counter to an iterable.
        for file_name in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)): ##parent is data, sub_dirs are the classes
            print("Actual File Name: ", file_name)
            try:
                mfccs, rmse, spectral_flux, zcr = feature_extraction(file_name)
            except Exception as e:
                print("[Error] there was an error in feature extraction. %s" % (e))
                continue
            extracted_features = np.hstack([mfccs, rmse, spectral_flux, zcr])
            features = np.vstack([features, extracted_features]) #Stack arrays in sequence vertically (row wise).
            labels = np.append(labels, label)
            filenames = np.append(filenames, file_name)
        print("Extracted features from %s, done" % (sub_dir))
    return np.array(features), np.array(labels, dtype = int), filenames ## arrays with features and corresponding labels for each

In [15]:
#Read audio classes directories
audio_subdirectories = os.listdir("audio-data") #Path to data folder.
audio_subdirectories.sort()
print('Audio Subdirs: ', audio_subdirectories)

Audio Subdirs:  ['001 - Low', '002 - Intermediate', '003 - High']


In [16]:
features, labels, filenames = parse_audio_files_mfcc('audio-data', audio_subdirectories) #(parent dir,sub dirs)
np.save('mfcc-features-original.npy', features)
np.save('mfcc-labels-original.npy', labels)
np.save('mfcc-filenames-original.npy', filenames)

Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 1 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 10 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 102 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 103 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 104 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 108 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 11 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 110 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 111 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 113 - D.mp3
Actual File Name:  audio-data/001 - Low/Avalinguo - Dana and Konay segment 114 - D.mp3
Actual File Name:  audio-data/001 - Low/Avaling