In [357]:
import librosa
import librosa.display
from pathlib import Path
import numpy as np
import pandas as pd

In [358]:
# read out directories
data_path = Path("./data", "genres")
items = data_path.iterdir()
items = list(filter(lambda x: x.is_dir(), items)) # only directories of genres
#items = list(filter(lambda x: "pop" in str(x), items)) # only directories of genres

In [359]:
cols = [
    "name",
    "chroma_stft_mean",
    "rms_mean",
    "bpm",
    "spectral_centroid_mean",
    "spectral_bandwidth_mean",
    "rolloff_mean",
    "zero_crossing_rate_mean",
    "harmony_mean",
    "perceptr_mean",
    "mfcc1_mean",
    "mfcc2_mean",
    "mfcc3_mean",
    "mfcc4_mean",
    "mfcc5_mean",
    "mfcc6_mean",
    "mfcc7_mean",
    "mfcc8_mean",
    "mfcc9_mean",
    "mfcc10_mean",
    "mfcc11_mean",
    "mfcc12_mean",
    "mfcc13_mean",
    "mfcc14_mean",
    "mfcc15_mean",
    "mfcc16_mean",
    "mfcc17_mean",
    "mfcc18_mean",
    "mfcc19_mean",
    "mfcc20_mean",
    "label"
]


Feature extraction functions

In [360]:
def zero_crossing_rate(audio_file):
    zcr = librosa.feature.zero_crossing_rate(y=audio_file)
    return np.mean(zcr)


In [361]:
def rms(audio_file):
    rms_vals = librosa.feature.rms(y=audio_file)
    return np.mean(rms_vals)


In [362]:
def harmonics(audio_file):
    y_harm, y_perc = librosa.effects.hpss(y=audio_file)
    return np.mean(y_harm), np.mean(y_perc)


In [363]:
def get_bpm(y, sr):
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    return tempo


In [364]:
def spectral_centroid(audio_file, sr):
    centroids = librosa.feature.spectral_centroid(y=audio_file, sr=sr)
    return np.mean(centroids)


In [365]:
def spectral_bandwidth(audio_file, sr):
    bandwidth = librosa.feature.spectral_bandwidth(y=audio_file, sr=sr)
    return np.mean(bandwidth)


In [366]:
def spectral_rolloff(audio_file, sr):
    rolloff = librosa.feature.spectral_rolloff(y=audio_file, sr=sr)
    return np.mean(rolloff)


In [367]:
def mfccs_list(audio_file, sr):
    mfccs_raw = librosa.feature.mfcc(y=audio_file, sr=sr)
    mfccs = np.mean(mfccs_raw, axis=1)
    return mfccs.tolist()


In [368]:
def chroma(audio_file, sr, hop_length=5000):
    # Increase or decrease hop_length to change how granular you want your data to be

    # Chromogram
    chromagram = librosa.feature.chroma_stft(y=audio_file, sr=sr, hop_length=hop_length)
    return np.mean(chromagram)


In [369]:
def extract_features(genres):
    df = pd.DataFrame(columns=cols)

    for genre in genres:
        name = str(genre).split("/")[-1]
        for audio_file in genre.iterdir():
            file_name = str(audio_file).split("/")[-1]

            # pressure strengths (y) and sample rate (sr)
            y, sr = librosa.load(audio_file)
            # Trim leading and trailing silence from an audio signal (silence before and after the actual audio)
            audio_file, _ = librosa.effects.trim(y)

            # features
            zcr = zero_crossing_rate(audio_file)
            harm, perc = harmonics(audio_file)
            bpm = get_bpm(audio_file, sr)
            spec_centr = spectral_centroid(audio_file, sr)
            spec_band = spectral_bandwidth(audio_file, sr)
            spec_roll = spectral_rolloff(audio_file, sr)
            mfccs = mfccs_list(audio_file, sr)
            chroma_freq = chroma(audio_file, sr)
            rms_mean = rms(audio_file)

            # append to df in column order
            row = [
                file_name,
                chroma_freq,
                rms_mean,
                bpm,
                spec_centr,
                spec_band,
                spec_roll,
                zcr,
                harm,
                perc,
                *mfccs,
                name,
            ]
            df.loc[len(df)] = row

    return df


In [370]:
df = extract_features(items)

In [371]:
df.to_csv("data/features.csv")

In [372]:
df.head()

Unnamed: 0,name,chroma_stft_mean,rms_mean,bpm,spectral_centroid_mean,spectral_bandwidth_mean,rolloff_mean,zero_crossing_rate_mean,harmony_mean,perceptr_mean,...,mfcc12_mean,mfcc13_mean,mfcc14_mean,mfcc15_mean,mfcc16_mean,mfcc17_mean,mfcc18_mean,mfcc19_mean,mfcc20_mean,label
0,hiphop.00023.wav,0.390503,0.190476,92.285156,2240.289987,2193.980956,4728.436605,0.106657,-1.6e-05,-0.000355,...,8.381841,-15.426938,4.878397,-10.051116,4.564088,-1.991847,7.979235,1.212326,1.365209,hiphop
1,hiphop.00005.wav,0.500908,0.261586,71.777344,2709.171523,2510.848006,5794.946346,0.124583,-0.000284,-0.002899,...,14.002207,-8.07207,7.428148,-5.495323,12.837337,-3.695439,6.218712,-2.584942,0.102165,hiphop
2,hiphop.00038.wav,0.427645,0.292688,184.570312,2363.017438,2384.234167,4735.809572,0.105224,0.001348,-0.003059,...,-0.473602,-0.853041,-1.762178,-0.08044,3.510901,0.74759,1.177753,-1.213256,-3.546965,hiphop
3,hiphop.00089.wav,0.485283,0.120688,123.046875,2669.947536,2396.503479,5526.4308,0.117572,-2.1e-05,-0.000635,...,17.616529,-0.035863,14.716657,-4.235781,10.654611,-6.327601,1.040412,-1.82562,5.55678,hiphop
4,hiphop.00011.wav,0.457304,0.203709,135.999178,2095.493684,2241.001468,4581.942544,0.094151,-3.7e-05,-0.001135,...,6.884745,-15.329198,4.779245,-11.102331,4.739863,-2.809381,6.800127,-5.809848,-3.773828,hiphop
