In [1]:
import os
import librosa
import numpy as np

def extract_features_librosa(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)
    
    # Extract features
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zero_crossings = librosa.feature.zero_crossing_rate(y)
    rms = librosa.feature.rms(y=y)
    
    # Estimate tempo and beats
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    
    # A simple heuristic for "danceability"
    # For example, we can assume higher tempo and consistent beats might be more danceable
    # (This is only an example and not a scientifically rigorous metric.)
    danceability = tempo / 200  # Normalize tempo (assume 200 BPM as an upper bound)
    
    # Aggregate features into a dictionary
    features = {
        'mfcc_mean': np.mean(mfccs, axis=1).tolist(),  # Mean of each MFCC coefficient
        'mfcc_std': np.std(mfccs, axis=1).tolist(),
        'chroma_mean': np.mean(chroma, axis=1).tolist(),
        'spectral_centroid_mean': np.mean(spectral_centroid).item(),
        'spectral_rolloff_mean': np.mean(spectral_rolloff).item(),
        'zero_crossing_rate_mean': np.mean(zero_crossings).item(),
        'rms_mean': np.mean(rms).item(),
        'tempo': tempo,
        'danceability': danceability,
        'beat_count': len(beat_frames)
    }
    return features

folder_path = './audio_previews'
audio_features_essentia = {}

folder_len = os.listdir(folder_path)
folder_len_num = len(folder_len)
counter = 0

for file in os.listdir(folder_path):
    if file.endswith('.mp3'):
        counter += 1
        print(f'Processing file {counter}/{folder_len_num}...')
        file_path = os.path.join(folder_path, file)
        file_id = os.path.splitext(file)[0]
        features = extract_features_librosa(file_path)
        audio_features_essentia[file_id] = features

print(audio_features_essentia)

Processing file 1/1772...
Processing file 2/1772...
Processing file 3/1772...
Processing file 4/1772...
Processing file 5/1772...
Processing file 6/1772...
Processing file 7/1772...
Processing file 8/1772...
Processing file 9/1772...
Processing file 10/1772...
Processing file 11/1772...
Processing file 12/1772...
Processing file 13/1772...
Processing file 14/1772...
Processing file 15/1772...
Processing file 16/1772...
Processing file 17/1772...
Processing file 18/1772...
Processing file 19/1772...
Processing file 20/1772...
Processing file 21/1772...
Processing file 22/1772...
Processing file 23/1772...
Processing file 24/1772...
Processing file 25/1772...
Processing file 26/1772...
Processing file 27/1772...
Processing file 28/1772...
Processing file 29/1772...
Processing file 30/1772...
Processing file 31/1772...
Processing file 32/1772...
Processing file 33/1772...
Processing file 34/1772...
Processing file 35/1772...
Processing file 36/1772...
Processing file 37/1772...
Processing

KeyboardInterrupt: 