In [13]:
import os
import librosa
import numpy as np


def summarize_feature(feature_array):
    """
    feature_array shape: [num_coeffs, num_frames]
    Returns: 1D numpy array containing mean, std, and median of each row.
    """
    # axis=1 means we compute statistics across 'frames'
    means = np.mean(feature_array, axis=1)
    stds = np.std(feature_array, axis=1)
    medians = np.median(feature_array, axis=1)
    
    # Concatenate all stats into one 1D array
    return np.concatenate([means, stds, medians], axis=0)  #[means, stds, medians]

def extract_features_librosa(file_path):

    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)
    
    # Extract features
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
    chroma_vqt = librosa.feature.chroma_vqt(y=y, sr=sr, intervals='equal')
    melspectogram = librosa.feature.melspectrogram(y=y, sr=sr)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12) # Numer of mfccs difference?
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwith = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_flatness = librosa.feature.spectral_flatness(y=y) 
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    poly_features = librosa.feature.poly_features(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    zero_crossings = librosa.feature.zero_crossing_rate(y)
    
    # Estimate tempo and beats
    tempo = librosa.feature.tempo(y=y, sr=sr)
    tempogram = librosa.feature.tempogram(y=y, sr=sr)
    fourier_tempogram = librosa.feature.fourier_tempogram(y=y, sr=sr)
    tempogram_ratio = librosa.feature.tempogram_ratio(y=y, sr=sr)

    track_features = np.concatenate([
        summarize_feature(mfccs),
        summarize_feature(chroma_stft),
        summarize_feature(chroma_cqt),
        summarize_feature(chroma_cens),
        summarize_feature(chroma_vqt),
        summarize_feature(melspectogram),
        summarize_feature(spectral_centroid),
        summarize_feature(spectral_bandwith),
        summarize_feature(spectral_contrast),
        summarize_feature(spectral_flatness),
        summarize_feature(spectral_rolloff),
        summarize_feature(poly_features),
        summarize_feature(tonnetz),
        summarize_feature(zero_crossings),
        summarize_feature(tempogram),
        summarize_feature(fourier_tempogram),
        summarize_feature(tempogram_ratio),
        summarize_feature(rms),
        tempo
    ], axis=0)

    return track_features

    # Aggregate features into a dictionary
    """
    features = {
        'chroma_stft': ,
        'chroma_cqt': summarize_feature(chroma_cqt),
        'chroma_cens': summarize_feature(chroma_cens),
        'chroma_vqt': summarize_feature(chroma_vqt),
        'melspectogram': summarize_feature(melspectogram),
        'mfccs': ,
        'rms': summarize_feature(rms),
        'spectral_centroid': summarize_feature(spectral_centroid),
        'spectral_bandwith': summarize_feature(spectral_bandwith),
        'spectral_contrast': summarize_feature(spectral_contrast),
        'spectral_flatness': summarize_feature(spectral_flatness),
        'spectral_rolloff': summarize_feature(spectral_rolloff),
        'poly_features': summarize_feature(poly_features),
        'tonnetz': summarize_feature(tonnetz),
        'zero_crossings': summarize_feature(zero_crossings),
        'tempo': tempo,
        'tempogram': summarize_feature(tempogram),
        'fourier_tempogram': summarize_feature(fourier_tempogram),
        'tempogram_ratio': summarize_feature(tempogram_ratio)
    }
    return features
    """

folder_path = './audio_previews'
X = []

folder_len = os.listdir(folder_path)
folder_len_num = len(folder_len)
counter = 0

for file in os.listdir(folder_path):
    if file.endswith('.mp3'):
        counter += 1
        print(f'Processing file {counter}/{folder_len_num}...')
        file_path = os.path.join(folder_path, file)
        file_id = os.path.splitext(file)[0]
        features = extract_features_librosa(file_path)
        X.append(features)
    if counter == 1:
        break



Processing file 1/1772...


In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)


ValueError: Complex data not supported
[array([-2.43607666e+02+0.j,  2.02283173e+02+0.j, -9.33068466e+00+0.j, ...,
        3.39018144e-02+0.j,  1.07776299e-01+0.j,  1.20185320e+02+0.j])]
