In [1]:
import librosa
import librosa.display
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
# read out directories
data_path = Path("./data", "genres")
items = data_path.iterdir()
items = list(filter(lambda x: x.is_dir(), items)) # only directories of genres
#items = list(filter(lambda x: "pop" in str(x), items)) # only directories of genres

In [3]:
cols = [
    "name",
    "chroma_stft_mean",
    "chroma_stft_var",
    "rms_mean",
    "rms_var",
    "bpm",
    "spectral_centroid_mean",
    "spectral_centroid_var",
    "spectral_bandwidth_mean",
    "spectral_bandwidth_var",
    "rolloff_mean",
    "rolloff_var",
    "zero_crossing_rate_mean",
    "zero_crossing_rate_var",
    "harmony_mean",
    "harmony_var",
    "perceptr_mean",
    "perceptr_var",
    "mfcc1_mean",
    "mfcc1_var",
    "mfcc2_mean",
    "mfcc2_var",
    "mfcc3_mean",
    "mfcc3_var",
    "mfcc4_mean",
    "mfcc4_var",
    "mfcc5_mean",
    "mfcc5_var",
    "mfcc6_mean",
    "mfcc6_var",
    "mfcc7_mean",
    "mfcc7_var",
    "mfcc8_mean",
    "mfcc8_var",
    "mfcc9_mean",
    "mfcc9_var",
    "mfcc10_mean",
    "mfcc10_var",
    "mfcc11_mean",
    "mfcc11_var",
    "mfcc12_mean",
    "mfcc12_var",
    "mfcc13_mean",
    "mfcc13_var",
    "mfcc14_mean",
    "mfcc14_var",
    "mfcc15_mean",
    "mfcc15_var",
    "mfcc16_mean",
    "mfcc16_var",
    "mfcc17_mean",
    "mfcc17_var",
    "mfcc18_mean",
    "mfcc18_var",
    "mfcc19_mean",
    "mfcc19_var",
    "mfcc20_mean",
    "mfcc20_var",
    "label"
]

Feature extraction functions

In [4]:
def zero_crossing_rate(audio_file):
    zcr = librosa.feature.zero_crossing_rate(y=audio_file)
    return np.mean(zcr), np.var(zcr)


In [5]:
def rms(audio_file):
    rms_vals = librosa.feature.rms(y=audio_file)
    return np.mean(rms_vals), np.var(rms_vals)


In [6]:
def harmonics(audio_file):
    y_harm, y_perc = librosa.effects.hpss(y=audio_file)
    return np.mean(y_harm), np.var(y_harm), np.mean(y_perc), np.var(y_perc)


In [7]:
def get_bpm(y, sr):
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    return tempo


In [8]:
def spectral_centroid(audio_file, sr):
    centroids = librosa.feature.spectral_centroid(y=audio_file, sr=sr)
    return np.mean(centroids), np.var(centroids)


In [9]:
def spectral_bandwidth(audio_file, sr):
    bandwidth = librosa.feature.spectral_bandwidth(y=audio_file, sr=sr)
    return np.mean(bandwidth), np.var(bandwidth)


In [10]:
def spectral_rolloff(audio_file, sr):
    rolloff = librosa.feature.spectral_rolloff(y=audio_file, sr=sr)
    return np.mean(rolloff), np.var(rolloff)


In [11]:
def mfccs_list(audio_file, sr):
    mfccs_raw = librosa.feature.mfcc(y=audio_file, sr=sr)
    mfccs_mean = np.mean(mfccs_raw, axis=1)
    mfccs_var = np.var(mfccs_raw, axis=1)
    return mfccs_mean.tolist(), mfccs_var.tolist()


In [12]:
def chroma(audio_file, sr, hop_length=5000):
    # Increase or decrease hop_length to change how granular you want your data to be

    # Chromogram
    chromagram = librosa.feature.chroma_stft(y=audio_file, sr=sr, hop_length=hop_length)
    return np.mean(chromagram), np.var(chromagram)


In [13]:
def extract_features(genres):
    df = pd.DataFrame(columns=cols)

    for genre in genres:
        name = str(genre).split("/")[-1]
        for audio_file in genre.iterdir():
            file_name = str(audio_file).split("/")[-1]

            # pressure strengths (y) and sample rate (sr)
            y, sr = librosa.load(audio_file)
            # Trim leading and trailing silence from an audio signal (silence before and after the actual audio)
            audio_file, _ = librosa.effects.trim(y)

            # features
            zcr_mean, zcr_var = zero_crossing_rate(audio_file)
            harm_mean, harm_var, perc_mean, perc_var = harmonics(audio_file)
            bpm = get_bpm(audio_file, sr)
            spec_centr_mean, spec_centr_var = spectral_centroid(audio_file, sr)
            spec_band_mean, spec_band_var = spectral_bandwidth(audio_file, sr)
            spec_roll_mean, spec_roll_var = spectral_rolloff(audio_file, sr)
            mfccs_mean, mfccs_var = mfccs_list(audio_file, sr)
            chroma_freq_mean, chroma_freq_var = chroma(audio_file, sr)
            rms_mean, rms_var = rms(audio_file)

            # append to df in column order
            row = [
                file_name,
                chroma_freq_mean,
                chroma_freq_var,
                rms_mean,
                rms_var,
                bpm,
                spec_centr_mean,
                spec_centr_var,
                spec_band_mean,
                spec_band_var,
                spec_roll_mean,
                spec_roll_var,
                zcr_mean,
                zcr_var,
                harm_mean,
                harm_var,
                perc_mean,
                perc_var,
                *mfccs_mean,
                *mfccs_var,
                name,
            ]
            df.loc[len(df)] = row

    return df


In [14]:
df = extract_features(items)

In [371]:
df.to_csv("data/features_var.csv")