In [1]:
import os
import sys


module_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))  
if module_path not in sys.path:       
    sys.path.append(module_path)

In [2]:
import librosa
import museval
import numpy as np
import tensorflow as tf
import IPython.display as ipd
from utils.helper import wav_to_spectrogram_clips, rebuild_audio_from_spectro_clips
from utils.dataset import create_samples
from models.conv_denoising_unet import ConvDenoisingUnet
from models.conv_encoder_denoising_decoder import ConvEncoderDenoisingDecoder
from models.conv_resblock_denoising_unet import ConvResblockDenoisingUnet
from evaluation import evaluate
#from evaluation.evaluate import get_separated_tracks, get_reference_tracks, estimate_and_evaluate

In [3]:
sorted(os.listdir(os.path.join(os.pardir, 'saved_model')))

['conv1d_DAE?time=2020-02-24_04:35.h5',
 'conv1d_DAE?time=2020-02-26_11:55.h5',
 'conv_denoising_unet?time=20200223_0347.h5',
 'conv_denoising_unet?time=20200223_1031_with_sum_constraint.h5',
 'conv_denoising_unet?time=20200226_1546_with_sum_constraint.h5',
 'conv_encoder_denoising_decoder?time=20200224_0618.h5',
 'conv_encoder_denoising_decoder?time=20200224_0738.h5',
 'conv_encoder_denoising_decoder?time=20200227_0838_l2_weight_regularization.h5',
 'conv_res56_denoising_unet?time=20200227_0646_l2_reg.h5',
 'conv_resblock_denoising_unet?time=20200229_1806_l1_reg.h5',
 'conv_resblock_denoising_unet?time=20200301_1113.h5',
 'weight_checkpoints']

In [4]:
samples = create_samples('Test')
test_sample = samples[20]
print(test_sample)

{'name': '021 - James May - On The Line', 'mix': '/media/yossarian42/master_thesis/audio_source_separator/data/DSD100/Mixtures/Test/021 - James May - On The Line/mixture.wav', 'vocals': '/media/yossarian42/master_thesis/audio_source_separator/data/DSD100/Sources/Test/021 - James May - On The Line/vocals.wav', 'bass': '/media/yossarian42/master_thesis/audio_source_separator/data/DSD100/Sources/Test/021 - James May - On The Line/bass.wav', 'drums': '/media/yossarian42/master_thesis/audio_source_separator/data/DSD100/Sources/Test/021 - James May - On The Line/drums.wav', 'other': '/media/yossarian42/master_thesis/audio_source_separator/data/DSD100/Sources/Test/021 - James May - On The Line/other.wav'}


In [5]:
model_path = os.path.join(os.pardir, 'saved_model', 'conv_denoising_unet?time=20200223_0347.h5')
model = tf.keras.models.load_model(model_path)

In [8]:
def get_separated_tracks(separator, mix_audio):
    # load mix music audio, average the stereo recording to single channel audio track
    # convert to spectrogram
    sound, sr = librosa.load(mix_audio, sr=44100, mono=True, duration=10)
    stft = librosa.stft(sound, n_fft=2048, hop_length=512, win_length=2048)
    mag, phase = librosa.magphase(stft)
    # chop magnitude of spectrogram into clips, each has 1025 bins, 100 frames
    stft_clips = np.empty((0, 1025, 100))
    for i in range(mag.shape[1] // 100):
        stft_clips = np.concatenate((stft_clips, mag[np.newaxis, :, i * 100: (i + 1) * 100])
    # separate components from the mix single channel music audio
    separated_sepctrograms = separator.predict(stft_clips)
    separated_tracks = list()
    # separated_spectrograms contains 4 stem tracks
    # the index of spectrograms: 0, 1, 2, 3 -> vocals, bass, drums, other
    for i in range(4):
        separated_track = np.squeeze(separated_spectrograms[i], axis=-1)
        separated_tracks.append(rebuild_audio_from_spectro_clips(separated_track))
    return separated_tracks


def get_reference_tracks(sample, track_shape):
    reference_tracks = list()
    for feat in ['vocals', 'bass', 'drums', 'other']:
        track, sr = librosa.load(sample[feat], sr=44100, mono=True, duration=10)
        # crop reference track to match separated track shape
        track = track[tuple(map(slice, track_shape))]
        reference_tracks.append(track)
    return reference_tracks

SyntaxError: invalid syntax (<ipython-input-8-60726d54d29b>, line 12)

In [None]:
separate_tracks = get_separated_tracks(model, test_sample['mix'])

In [None]:
reference_tracks = get_reference_tracks(test_sample['mix'], separate_tracks[0].shape)

In [None]:
import mir_eval

(sdr, sir, sar, perm) = mir_eval.separation.bss_eval_sources(references, estimates, compute_permutation=False)

## wav_to_spectrogram_clips will remove some frames from the original spectrogram

### reconstructon

In [None]:
spectrogram_clips = wav_to_spectrogram_clips(test_sample['mix'])

In [None]:
print(spectrogram_clips.shape)

In [None]:
spectrogram = np.concatenate(spectrogram_clips, axis=1)
print(spectrogram_clips.shape)

In [None]:
audio = rebuild_audio_from_spectro_clips(spectrogram_clips)
print('reconstructed audio waveform from wav_to_spectrogram_clips', audio.shape)

### original

In [None]:
sound, sr = librosa.load(test_sample['mix'], sr=44100, mono=True)
stft = librosa.stft(sound, n_fft=2048, hop_length=512, win_length=2048)
mag, phase = librosa.magphase(stft)
print(mag.shape)

In [None]:
track, sr = librosa.load(test_sample['mix'], sr=44100, mono=True)
print('true size of the original audio waveform', track.shape)