In [1]:
%load_ext autoreload
%autoreload 2

#hsvs includes
from hsvs.model import SliceLayer, util
from hsvs.tools import synthesis
import hsvs

# 3rd party dependencies
import numpy as np
import scipy.io as sio  

import os
import tensorflow as tf
import tqdm
from tqdm.notebook import tqdm as tqdm_notebook

In [2]:
vowel  = 'i'  # [a,e,i,o,u]
singer = 'f6' # [f1 - f9, m1-m11]

# collecting sorce / output file paths 
singer_vowel_dir = singer + '_' + vowel
data_path  = os.path.abspath(os.path.join(os.path.dirname(hsvs.__file__), os.pardir, 'data'))
mat_file   = os.path.join(data_path, 'results', singer_vowel_dir, 'parameters.mat' )
model_path = os.path.join(data_path, 'results', singer_vowel_dir, 'predictor' )

# Load parameter trajectories from .mat file
parameters = sio.loadmat(mat_file)
pitch = parameters['f']
gain  = parameters['g']
Rd    = parameters['Rd']
zeros = parameters['z0']
poles = parameters['p0']
fs    = parameters['fs'][0,0]

num_samples = pitch.shape[0]
num_poles = poles.shape[1]
num_zeros = zeros.shape[1]

In [3]:
# custom layer converting two variables for radius and angle to a complex
def to_complex(x):
    r = tf.sigmoid(x[:,0::2]) # radius limited to [0..1]
    w = np.pi * tf.sigmoid(x[:,1::2]) # angle limited to [0..pi]
    return tf.complex(r, 0.) * tf.exp(tf.complex(0., w))

# input 1: pitch
input_pitch = tf.keras.Input(shape=(1,), name='input_pitch')
x_pitch = tf.keras.layers.Lambda(lambda x: x/100.)(input_pitch)

#input 2: Rd
input_Rd = tf.keras.Input(shape=(1,), name='input_Rd')

# dense network
inputs = tf.keras.layers.Concatenate(axis=1)([x_pitch, input_Rd])
x = tf.keras.layers.Dense(2)(inputs)
x = tf.keras.layers.Dense(8,   activation='softplus')(x)
x = tf.keras.layers.Dense(32,  activation='softplus')(x)
x = tf.keras.layers.Dense(62,  activation='softplus')(x)
x = tf.keras.layers.Dense(62,)(x)

# splitting in four branches for gain, Rd, poles and zeros
x_gain, x_Rd, x_p0, x_z0 = SliceLayer.SliceLayer(slice_lens = [1, 1, 2*num_poles, 2*num_zeros])(x)
x_gain = tf.keras.layers.Lambda(lambda x: 100.*x, name='Gain')(x_gain)
x_Rd   = tf.keras.layers.Lambda(lambda x:   1.*x, name='Rd')(x_Rd)
x_p0   = tf.keras.layers.Lambda(lambda x: to_complex(x), name='p0')(x_p0)
x_z0   = tf.keras.layers.Lambda(lambda x: to_complex(x), name='z0')(x_z0)

#define model
model = tf.keras.Model(inputs=[input_pitch, input_Rd], outputs=[x_gain, x_Rd, x_p0, x_z0])

In [4]:
# relative weighting for the four ouput branches 
gain_weight = 0.1 # x/dB
Rd_weight   = 4.   # x/Rd
w_weight    = 0.01 * fs/(2.*np.pi)  # x/Hz
r_weight    = 0.1 # x/dB  

# loss for Rd and gain is weighted mse.
# loss for pole zero is custom loss, basically additive combination of weighted mse for angle and (shaped) radius
losses = [lambda y_true, y_pred: util.weighted_mse_loss(y_true, y_pred, tf.square(gain_weight)), 
          lambda y_true, y_pred: util.weighted_mse_loss(y_true, y_pred, tf.square(Rd_weight)), 
          lambda y_true, y_pred: util.pole_zero_loss(y_true, y_pred, r_weight, w_weight),
          lambda y_true, y_pred: util.pole_zero_loss(y_true, y_pred, r_weight, w_weight)]
          

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=10E-4),
    loss=losses)

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_pitch (InputLayer)        [(None, 1)]          0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 1)            0           input_pitch[0][0]                
__________________________________________________________________________________________________
input_Rd (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 2)            0           lambda[0][0]                     
                                                                 input_Rd[0][0]               

In [5]:
num_epochs = 32000

# custom tqdm progress par to reduce logging noise
pbar = tqdm_notebook(total=num_epochs)
def tqdm_update(epoch, logs):
    pbar.update()
    pbar.set_postfix_str("Loss: %s" % logs['loss'])

model.fit(x=[pitch, Rd], y=[gain, Rd, poles, zeros], 
    epochs = num_epochs, 
    batch_size=num_samples,
    callbacks=[tf.keras.callbacks.LambdaCallback(on_epoch_end=tqdm_update)], verbose=0)
pbar.close()


HBox(children=(FloatProgress(value=0.0, max=32000.0), HTML(value='')))




In [6]:
# store pre-trianed model
model.save(model_path)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: e:\Studium\Master\MasterThesis\Development\Python\data\results\f6_a\predictor\assets


In [7]:
# resetting the kernel to flush unused memory
# notification beep.
import numpy as np
import sounddevice
beep = np.sin(np.linspace(0, 1000*np.pi,22050))
sounddevice.play(beep, 44100, blocking=True)

%reset -f
exit()