In [1]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, Dense, ZeroPadding1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import clear_session
from tensorflow.keras.activations import tanh, elu, relu
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K
from tensorflow.keras.utils import Sequence

import os
from scipy import signal
from scipy.io import wavfile
import numpy as np
import math
import h5py

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')

try:
    # Disable all GPUS
    tf.config.set_visible_devices([], 'GPU')
    visible_devices = tf.config.get_visible_devices()
    for device in visible_devices:
        print(device)
except:
    pass

PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')


In [3]:
# EDIT THIS SECTION FOR USER INPUTS
#
name = 'model_0'
in_file = '../data/ts9_test1_in_FP32.wav'
out_file = '../data/ts9_test1_out_FP32.wav'
epochs = 1

train_mode = 0     # 0 = speed training, 
                   # 1 = accuracy training 
                   # 2 = extended training

input_size = 150 
batch_size = 4096 
test_size = 0.2

if not os.path.exists('models/'+name):
    os.makedirs('models/'+name)
else:
    print("A model with the same name already exists. Please choose a new name.")
    exit

A model with the same name already exists. Please choose a new name.


In [4]:
def pre_emphasis_filter(x, coeff=0.95):
    return tf.concat([x, x - coeff * x], 1)
    
def error_to_signal(y_true, y_pred): 
    """
    Error to signal ratio with pre-emphasis filter:
    """
    y_true, y_pred = pre_emphasis_filter(y_true), pre_emphasis_filter(y_pred)
    return K.sum(tf.pow(y_true - y_pred, 2), axis=0) / (K.sum(tf.pow(y_true, 2), axis=0) + 1e-10)

def save_wav(name, data):
    wavfile.write(name, 44100, data.flatten().astype(np.float32))

def normalize(data):
    data_max = max(data)
    data_min = min(data)
    data_norm = max(data_max,abs(data_min))
    return data / data_norm

In [5]:
# Load and Preprocess Data ###########################################
in_rate, in_data = wavfile.read(in_file)
out_rate, out_data = wavfile.read(out_file)

X_all = in_data.astype(np.float32).flatten()  
X_all = normalize(X_all).reshape(len(X_all),1)   
y_all = out_data.astype(np.float32).flatten() 
y_all = normalize(y_all).reshape(len(y_all),1)

# Get the last 20% of the wav data for testing and thee rest for training
y_training, y_testing = np.split(y_all, [int(len(y_all)*.8)])
X_training, X_testing = np.split(X_all, [int(len(X_all)*.8)])
print(f"y_training shape: {y_training.shape}")
print(f"X_training shape: {X_training.shape}")

# The input size defines the number of samples used for each prediction
# Therefore the first output value that we get is at index input_size-1
y_ordered_training = y_training[input_size-1:]
print(f"y_ordered_training shape: {y_ordered_training.shape}")

indices = np.arange(input_size) + np.arange(len(X_training)-input_size+1)[:,np.newaxis]
X_ordered_training = tf.gather(X_training,indices) 
print(f"X_ordered_training shape: {X_ordered_training.shape}")

shuffled_indices = np.random.permutation(len(X_ordered_training)) 
X_random_training = tf.gather(X_ordered_training, shuffled_indices)
y_random_training = tf.gather(y_ordered_training, shuffled_indices)

print(f"The X_random_training data is an array, where each element is an array of input_size samples in time order. Therefore the lenght is smaller than the original X_training array (the first {input_size} samples are grouped).")
print(f"The y_random_training data is an array, where each element is a single sample. This single sample is the target output for the corresponding X_random_training element, which consists of input samples.")

y_training shape: (6587907, 1)
X_training shape: (6587907, 1)
y_ordered_training shape: (6587758, 1)
X_ordered_training shape: (6587758, 150, 1)
The X_random_training data is an array, where each element is an array of input_size samples in time order. Therefore the lenght is smaller than the original X_training array (the first 150 samples are grouped).
The y_random_training data is an array, where each element is a single sample. This single sample is the target output for the corresponding X_random_training element, which consists of input samples.


In [6]:
'''This is a similar Tensorflow/Keras implementation of the LSTM model from the paper:
    "Real-Time Guitar Amplifier Emulation with Deep Learning"
    https://www.mdpi.com/2076-3417/10/3/766/htm

    Uses a stack of two 1-D Convolutional layers, followed by LSTM, followed by 
    a Dense (fully connected) layer. Three preset training modes are available, 
    with further customization by editing the code. A Sequential tf.keras model 
    is implemented here.

    Note: RAM may be a limiting factor for the parameter "input_size". The wav data
      is preprocessed and stored in RAM, which improves training speed but quickly runs out
      if using a large number for "input_size".  Reduce this if you are experiencing
      RAM issues.
'''

if train_mode == 0:         # Speed Training
    learning_rate = 0.01 
    conv1d_strides = 12    
    conv1d_filters = 16
    hidden_units = 36
elif train_mode == 1:       # Accuracy Training (~10x longer than Speed Training)
    learning_rate = 0.01 
    conv1d_strides = 4
    conv1d_filters = 36
    hidden_units= 64
else:                       # Extended Training (~60x longer than Accuracy Training)
    learning_rate = 0.0005 
    conv1d_strides = 3
    conv1d_filters = 36
    hidden_units= 96


# Create Sequential Model ###########################################
clear_session()
model = Sequential()
model.add(ZeroPadding1D(12, input_shape=(input_size,1)))
model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None))
model.add(ZeroPadding1D(12))
model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None))
model.add(LSTM(hidden_units))
model.add(Dense(1, activation=None))
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse', metrics=[error_to_signal])
model.summary()

# Train Model ###################################################
history = model.fit(X_random_training,y_random_training, epochs=epochs, batch_size=batch_size, validation_split=test_size)     
model.save('models/'+name+'/'+name+'.h5')

# Add additional data to the saved model (like input_size)
filename = 'models/'+name+'/'+name+'.h5'
f = h5py.File(filename, 'a')
grp = f.create_group("info")
dset = grp.create_dataset("input_size", (1,), dtype='int16')
dset[0] = input_size
f.close()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 zero_padding1d (ZeroPaddin  (None, 174, 1)            0         
 g1D)                                                            
                                                                 
 conv1d (Conv1D)             (None, 14, 16)            208       
                                                                 
 zero_padding1d_1 (ZeroPadd  (None, 38, 16)            0         
 ing1D)                                                          
                                                                 
 conv1d_1 (Conv1D)           (None, 3, 16)             3088      
                                                                 
 lstm (LSTM)                 (None, 36)                7632      
                                                                 
 dense (Dense)               (None, 1)                 3

  saving_api.save_model(


In [7]:
# Run Prediction #################################################
print("Running prediction..")

indices = np.arange(input_size) + np.arange(len(X_testing)-input_size+1)[:,np.newaxis] 
X_ordered_testing = tf.gather(X_testing,indices) 

prediction = model.predict(X_ordered_testing, batch_size=batch_size)

save_wav('models/'+name+'/y_pred.wav', prediction)
save_wav('models/'+name+'/x_test.wav', X_testing)
save_wav('models/'+name+'/y_test.wav', y_testing)

print("X_testing shape: ", X_testing.shape)
print("y_testing shape: ", y_testing.shape)
print("prediction shape: ", prediction.shape)

print("Note that the prediction shape is smaller than the y_testing shape. This is because the first predicted sample needs input_size samples for prediction.")

Running prediction..
X_testing shape:  (1646977, 1)
y_testing shape:  (1646977, 1)
prediction shape:  (1646828, 1)
Note that the prediction shape is smaller than the y_testing shape. This is because the first predicted sample needs input_size samples for prediction.
