In [1]:
import sys
sys.version

'3.7.4 (default, Oct  4 2019, 06:57:26) \n[GCC 9.2.0]'

In [2]:
PATH = "tests/test_data"
HOW_MUCH = 10

In [3]:
# generate clean recordings

### define mapping to STFT

import numpy as np

def mapping(wave):
    wave = wave.astype(np.float32) / 2**15
    length = (len(wave) - 512) // 256  # shortcut here
    spec = np.zeros([length, 257], np.complex64)
    for i in range(length):
        spec[i, :] = np.fft.rfft(wave[i * 256 : i * 256 + 512])
    return spec

### list recordings
import os

records = [x for x in os.listdir(PATH) if x.endswith(".wav")]
records = [os.path.join(PATH, x) for x in records]

### load cleans
import scipy.io.wavfile as sio

clean_lengths = [sio.read(x)[1].shape for x in records]
cleans = [mapping(sio.read(x)[1]) for x in records]
spec_lens = [x.shape[0] for x in cleans]
max_spec_len = max([x.shape[0] for x in cleans])
cleans = np.stack([np.pad(x, ((max_spec_len - x.shape[0], 0), (0, 0)), 'constant') for x in cleans])

### mix them up - two mixtures
components_1 = np.stack([cleans[np.random.randint(cleans.shape[0])] for x in range(HOW_MUCH)])
components_2 = np.stack([cleans[np.random.randint(cleans.shape[0])] for x in range(HOW_MUCH)])
mixtures = np.stack([(components_1[x] + components_2[x]) for x in range(HOW_MUCH)])

### map them
components_1 = np.abs(components_1)
components_2 = np.abs(components_2)
mixtures = np.abs(mixtures)

In [4]:
# build model

from keras import layers, models
import keras.backend as K
import tensorflow as tf

first = layers.Input(shape=(None, 257))
lyr = first
lyr = layers.Dense(512, activation='sigmoid')(lyr)
lyr = layers.Convolution1D(kernel_size=5, filters=512, activation='relu', padding='same')(lyr)
lyr = layers.Convolution1D(kernel_size=5, filters=512, activation='relu', padding='same')(lyr)
lyr = layers.LSTM(512, return_sequences=True)(lyr)
lyr = layers.LSTM(512, return_sequences=True, activation='sigmoid')(lyr)
lyr = layers.Lambda(lambda x: K.stack([x[:, :, :256], x[:, :, 256:]], axis=-1))(lyr)
lyr = layers.Lambda(lambda x: tf.pad(x, ((0, 0), (0, 0), (1, 0), (0, 0))))(lyr)
lyr = layers.Lambda(lambda x: x[0] * K.stack([x[1], x[1]], axis=-1) )([lyr, first])
mdl = models.Model(first, lyr)


Using TensorFlow backend.


In [5]:
mdl.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 257)    0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, None, 512)    132096      input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, None, 512)    1311232     dense_1[0][0]                    
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, None, 512)    1311232     conv1d_1[0][0]                   
____________________________________________________________________________________________

In [9]:
# define PIT loss

def pit_loss(true, targets):
    return K.min([
        K.mean(K.mean(((true[:, :, :, 0] - targets[:, :, :, 0]) ** 2 + (true[:, :, :, 1] - targets[:, :, :, 1]) ** 2))),
        K.mean(K.mean(((true[:, :, :, 0] - targets[:, :, :, 1]) ** 2 + (true[:, :, :, 1] - targets[:, :, :, 0]) ** 2)))
    ])


In [10]:
mdl.compile('adam', pit_loss)

In [11]:
mdl.fit(mixtures, np.stack([components_1, components_2], axis=-1), epochs=1, batch_size=2)

Epoch 1/1


<keras.callbacks.callbacks.History at 0x7f91907e2050>