In [1]:
import librosa
import numpy
from keras import Sequential
from keras.layers import LSTM, Activation, Dense
from keras.optimizers import Adam

import constants
from AudioManager import AudioManager

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Using TensorFlow backend.


In [17]:
def prepare_audio(language, sex, person_number, track, target=False):
    print(f"Loadig sample of {language} {sex}, person: {person_number}, track: {track}")
    audio_manager = AudioManager()
    series = audio_manager.load_sample(
        language=language,
        sex=sex,
        person_number=person_number,
        track=track
    )
    del audio_manager

    data = librosa.stft(series, n_fft=constants.MODEL_NFFT).swapaxes(0, 1)
    samples = []

    for i in range(0, len(data) - constants.BLOCK_LENGTH, constants.BLOCK_OVERLAP):
        samples.append(numpy.abs(data[i:i + constants.BLOCK_LENGTH]))

    results_shape = (len(samples), 1)
    results = numpy.ones(results_shape) if target else numpy.zeros(results_shape)
    return numpy.array(samples), results

In [18]:
voices = [
    ("japanese", "female", 1, 1, True),
    ("japanese", "female", 1, 2, True),
    ("japanese", "female", 1, 3, True),
    ("french", "male", 1, 1, False),
    ("french", "male", 1, 2, False),
    ("english", "male", 1, 1, False)
]

In [19]:
X, Y = prepare_audio(*voices[0])

Loadig sample of japanese female, person: 1, track: 1


In [20]:
for voice in voices[1:]:
    dx, dy = prepare_audio(*voice)
    X = numpy.concatenate((X, dx), axis=0)
    Y = numpy.concatenate((Y, dy), axis=0)
    del dx, dy

Loadig sample of japanese female, person: 1, track: 2
Loadig sample of japanese female, person: 1, track: 3
Loadig sample of french male, person: 1, track: 1
Loadig sample of french male, person: 1, track: 2
Loadig sample of english male, person: 1, track: 1


In [34]:
perm = numpy.random.permutation(len(X))

In [36]:
X = X[perm]

In [37]:
Y = Y[perm]

In [38]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=X.shape[1:]))
model.add(LSTM(64))
model.add(Dense(64))
model.add(Activation('tanh'))
model.add(Dense(16))
model.add(Activation('sigmoid'))
model.add(Dense(1))
model.add(Activation('hard_sigmoid'))

In [39]:
model

<keras.engine.sequential.Sequential at 0x25d6960ab88>

In [40]:
model.compile(Adam(lr=0.005), loss='binary_crossentropy', metrics=['accuracy'])

In [41]:
model.fit(X, Y, epochs=15, batch_size=32, validation_split=0.2)

Train on 39 samples, validate on 10 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.callbacks.History at 0x25d69534ec8>

In [42]:
print(model.evaluate(X, Y))

[0.09095557904517164, 0.9795918464660645]


In [56]:
X, Y = prepare_audio(*voices[2])
print(model.predict(X))

Loadig sample of japanese female, person: 1, track: 3
[[1.]
 [1.]
 [1.]
 [1.]]


In [57]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 16, 128)           328704    
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_1 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                1040      
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                

In [59]:
model.save_weights("weights")