In [1]:
from scipy import signal
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pathlib
import os
import librosa
import librosa.display

%matplotlib notebook

In [2]:
sound_path = '../dataset/sound/'
img_path = '../dataset/spectrograms/'
plt.rcParams['savefig.pad_inches'] = 0

In [3]:
def preprocess_song(audio, orig_sr, min_duration=10, target_sr=22050):
    y = librosa.core.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
    duration = librosa.core.get_duration(y, sr=target_sr)
    while duration < min_duration:
        y = np.append(y, y)
        duration = librosa.core.get_duration(y)
    return y, target_sr

In [4]:
def apply_ft(audio, sr, win_size=512, overlap_fac=0.75):
    hop_length = int(win_size - np.floor(overlap_fac * win_size))
    window = np.hanning(win_size)
    out  = librosa.core.spectrum.stft(audio, n_fft=win_size, hop_length=hop_length, window=window)
    return out

In [5]:
def extract_foreground(spec_stft, sr, margin=10, power=2):
    spec_full, phase = librosa.magphase(spec_stft)
    filter_width = int(librosa.time_to_frames(2, sr=sr))
    spec_filter = librosa.decompose.nn_filter(spec_full, aggregate=np.median, metric='cosine', width=filter_width)
    spec_filter = np.minimum(spec_full, spec_filter)
    x = spec_full - spec_filter
    x_ref = margin * spec_filter
    mask = librosa.util.softmask(x, x_ref, power=power)
    spec_foreground = mask * spec_full
    return spec_foreground

In [6]:
def plot_spec(audio, sr, filename, x_axis='time', y_axis='log'):
    plt.ioff()
    fig = plt.figure(figsize=None)
    ax = plt.axes([0,0,1,1], frameon=False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    plt.autoscale(tight=True)
    librosa.display.specshow(librosa.amplitude_to_db(audio, ref=np.max), 
                             y_axis=y_axis, x_axis=x_axis, sr=sr)
    plt.savefig(filename)
    plt.close()

In [7]:
for i, filename in enumerate(pathlib.Path(sound_path).glob('*.mp3')):
    img_filename = "{}{}.png".format(img_path, filename.stem)
    rec, sr = librosa.load(filename)
    rec, sr = preprocess_song(rec, sr)
    spec_stft = apply_ft(rec, sr)
    spec_foreground = extract_foreground(spec_stft, sr)
    plot_spec(spec_foreground, sr, img_filename)