In [1]:
import numpy as np
import scipy as sp

import tensorflow as tf
from tensorflow import keras
from keras import backend as K
import os

## Choose computation device (CPU)

In [2]:
physical_devices = tf.config.list_physical_devices()
print(f"These are the physical devices available:\n{physical_devices}")

try:
    # Disable all GPUS
    tf.config.set_visible_devices([], 'GPU')
    visible_devices = tf.config.get_visible_devices()
    print(f"These are the visible devices:\n{visible_devices}")
except:
    pass

These are the physical devices available:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
These are the visible devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


## User inputs

In [3]:
# EDIT THIS SECTION FOR USER INPUTS
#
name = 'model_0'
in_file = '../data/ts9_test1_in_FP32.wav'
out_file = '../data/ts9_test1_out_FP32.wav'
epochs = 1

train_mode = 0     # 0 = speed training, 
                   # 1 = accuracy training 
                   # 2 = extended training

input_size = 150 
batch_size = 4096 
test_size = 0.2

if not os.path.exists('models/'+name):
    os.makedirs('models/'+name)
else:
    print("A model with the same name already exists. Please choose a new name.")
    exit

## Define some helper functions

In [4]:
def pre_emphasis_filter(x, coeff=0.95):
    return tf.concat([x, x - coeff * x], 1)
    
def error_to_signal(y_true, y_pred): 
    """
    Error to signal ratio with pre-emphasis filter:
    """
    y_true, y_pred = pre_emphasis_filter(y_true), pre_emphasis_filter(y_pred)
    return K.sum(tf.pow(y_true - y_pred, 2), axis=0) / (K.sum(tf.pow(y_true, 2), axis=0) + 1e-10)

def save_wav(name, data):
    sp.io.wavfile.write(name, 44100, data.flatten().astype(np.float32))

def normalize(data):
    data_max = max(data)
    data_min = min(data)
    data_norm = max(data_max,abs(data_min))
    return data / data_norm

## Pre-processing the data

In [5]:
# Load and Preprocess Data ###########################################
in_rate, in_data = sp.io.wavfile.read(in_file)
out_rate, out_data = sp.io.wavfile.read(out_file)

X_all = in_data.astype(np.float32).flatten()  
X_all = normalize(X_all).reshape(len(X_all),1)   
y_all = out_data.astype(np.float32).flatten() 
y_all = normalize(y_all).reshape(len(y_all),1)

# Get the last 20% of the wav data for testing and thee rest for training
X_training, X_testing = np.split(X_all, [int(len(X_all)*.8)])
y_training, y_testing = np.split(y_all, [int(len(y_all)*.8)])
print(f"X_training shape (pre-processing): {X_training.shape}")
print(f"y_training shape (pre-processing): {y_training.shape}")
print(f"X_testing shape (pre-processing): {X_testing.shape}")
print(f"y_testing shape (pre-processing): {y_testing.shape}")

# Create a new array where each element is an array of input_size samples in time order
# Each element of the new array is shifted by one sample from the previous element
indices = np.arange(input_size) + np.arange(len(X_training)-input_size+1)[:,np.newaxis]
X_ordered_training = tf.gather(X_training,indices) 
print(f"X_ordered_training shape: {X_ordered_training.shape}")

# The input size defines the number of samples used for each prediction
# Therefore the first output value that we get is at index input_size-1
y_ordered_training = y_training[input_size-1:]
print(f"y_ordered_training shape: {y_ordered_training.shape}")


shuffled_indices = np.random.permutation(len(X_ordered_training)) 
X_random_training = tf.gather(X_ordered_training, shuffled_indices)
y_random_training = tf.gather(y_ordered_training, shuffled_indices)
print(f"X_random_training shape (post-processing): {X_random_training.shape}")
print(f"y_random_training shape (post-processing): {y_random_training.shape}")

print(f"The X_random_training data is an array, where each element is an array of input_size samples in time order. Therefore the lenght is smaller than the original X_training array (the first {input_size} samples are grouped).")
print(f"The y_random_training data is an array, where each element is a single sample. This single sample is the target output for the corresponding X_random_training element, which consists of input samples.")

X_training shape (pre-processing): (6587907, 1)
y_training shape (pre-processing): (6587907, 1)
X_testing shape (pre-processing): (1646977, 1)
y_testing shape (pre-processing): (1646977, 1)
X_ordered_training shape: (6587758, 150, 1)
y_ordered_training shape: (6587758, 1)
X_random_training shape (post-processing): (6587758, 150, 1)
y_random_training shape (post-processing): (6587758, 1)
The X_random_training data is an array, where each element is an array of input_size samples in time order. Therefore the lenght is smaller than the original X_training array (the first 150 samples are grouped).
The y_random_training data is an array, where each element is a single sample. This single sample is the target output for the corresponding X_random_training element, which consists of input samples.


## Define the model

In [6]:
'''This is a similar Tensorflow/Keras implementation of the LSTM model from the paper:
    "Real-Time Guitar Amplifier Emulation with Deep Learning"
    https://www.mdpi.com/2076-3417/10/3/766/htm

    Uses a stack of two 1-D Convolutional layers, followed by LSTM, followed by 
    a Dense (fully connected) layer. Three preset training modes are available, 
    with further customization by editing the code. A Functional keras model 
    is implemented here.

    Note: RAM may be a limiting factor for the parameter "input_size". The wav data
      is preprocessed and stored in RAM, which improves training speed but quickly runs out
      if using a large number for "input_size".  Reduce this if you are experiencing
      RAM issues.
'''

if train_mode == 0:         # Speed Training
    learning_rate = 0.01 
    conv1d_strides = 12   
    conv1d_1_strides = 12
    conv1d_filters = 16
    hidden_units = 36
elif train_mode == 1:       # Accuracy Training (~10x longer than Speed Training)
    learning_rate = 0.01 
    conv1d_strides = 4
    conv1d_filters = 36
    hidden_units= 64
else:                       # Extended Training (~60x longer than Accuracy Training)
    learning_rate = 0.0005 
    conv1d_strides = 3
    conv1d_filters = 36
    hidden_units= 96

# Create Functional Model ###########################################
keras.backend.clear_session()

inputs = keras.Input(shape=(input_size,1), batch_size=1)
x = keras.layers.ZeroPadding1D(padding=12, batch_size=1)(inputs)
x = keras.layers.Conv1D(filters=conv1d_filters, kernel_size=12, strides=conv1d_strides, activation=None, batch_size=1)(x)
x = keras.layers.ZeroPadding1D(padding=12, batch_size=1)(x)
x = keras.layers.Conv1D(filters=conv1d_filters, kernel_size=12, strides=conv1d_strides, activation=None, batch_size=1)(x)
x = keras.layers.LSTM(units=hidden_units, activation=keras.activations.tanh, return_sequences=False, stateful=False, batch_size=1)(x)
outputs = keras.layers.Dense(units=1, activation=None, batch_size=1)(x)
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=[error_to_signal])
model.summary()



Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(1, 150, 1)]             0         
                                                                 
 zero_padding1d (ZeroPaddin  (1, 174, 1)               0         
 g1D)                                                            
                                                                 
 conv1d (Conv1D)             (1, 14, 16)               208       
                                                                 
 zero_padding1d_1 (ZeroPadd  (1, 38, 16)               0         
 ing1D)                                                          
                                                                 
 conv1d_1 (Conv1D)           (1, 3, 16)                3088      
                                                                 
 lstm (LSTM)                 (1, 36)                   7632  

## Train the model

In [7]:
# Train Model ###################################################
history = model.fit(x=X_random_training, y=y_random_training, epochs=epochs, batch_size=batch_size, validation_split=test_size)     
model.save('models/'+name+'/'+name)

INFO:tensorflow:Assets written to: models/model_0/model_0/assets


INFO:tensorflow:Assets written to: models/model_0/model_0/assets


## Run predictions
### 1. On the test audio data

In [8]:
# Run Prediction #################################################
# Test the model on the testing data #############################

# pre-processing X_testing data
indices = np.arange(input_size) + np.arange(len(X_testing)-input_size+1)[:,np.newaxis]
X_ordered_testing = tf.gather(X_testing, indices)

print("Running prediction..")
prediction = model.predict(X_ordered_testing)

save_wav('models/'+name+'/y_pred.wav', prediction)
save_wav('models/'+name+'/x_test.wav', X_testing)
save_wav('models/'+name+'/y_test.wav', y_testing)

print("X_testing shape: ", X_testing.shape)
print("X_ordered_testing shape: ", X_ordered_testing.shape)
print("y_testing shape: ", y_testing.shape)
print("prediction shape: ", prediction.shape)

print("Note that the prediction shape is smaller than the y_testing shape. This is because the first predicted sample needs input_size samples for prediction.\n")

Running prediction..
X_testing shape:  (1646977, 1)
X_ordered_testing shape:  (1646828, 150, 1)
y_testing shape:  (1646977, 1)
prediction shape:  (1646828, 1)
Note that the prediction shape is smaller than the y_testing shape. This is because the first predicted sample needs input_size samples for prediction.



### 2. On a number sequence (to control inference)

In [9]:
# Test the model simple number sequence to compare with inference #
X_testing_2 = np.array([])

for i in range(0, 150):
    X_testing_2 = np.append(X_testing_2, i*0.001)

X_testing_2 = np.expand_dims(X_testing_2, axis=0)
X_testing_2 = np.expand_dims(X_testing_2, axis=0)

X_testing_2 = np.reshape(X_testing_2, (1, 150, 1))

print("Running prediction..")
prediction_2 = model.predict(X_testing_2)
print(f"prediction {prediction_2}")

print("X_testing_2 shape: ", X_testing_2.shape)
print("prediction_2 shape: ", prediction_2.shape)

Running prediction..
prediction [[-0.16080016]]
X_testing_2 shape:  (1, 150, 1)
prediction_2 shape:  (1, 1)


## Export as tflite model

In [10]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model('models/'+name+'/'+name) # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open("models/"+name+"/"+name+".tflite", 'wb') as f:
  f.write(tflite_model)

tf.lite.experimental.Analyzer.analyze(model_content=tflite_model)

2023-10-07 21:45:15.036022: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.


=== TFLite ModelAnalyzer ===

Your TFLite model has '1' subgraph(s). In the subgraph description below,
T# represents the Tensor numbers. For example, in Subgraph#0, the PAD op takes
tensor #0 and tensor #11 as input and produces tensor #28 as output.

Subgraph#0 main(T#0) -> [T#39]
  Op#0 PAD(T#0, T#11[0, 0, 12, 12, 0, ...]) -> [T#28]
  Op#1 RESHAPE(T#28, T#3[1, 1, 174, 1]) -> [T#29]
  Op#2 CONV_2D(T#29, T#7, T#1) -> [T#30]
  Op#3 RESHAPE(T#30, T#4[1, 14, 16]) -> [T#31]
  Op#4 PAD(T#31, T#11[0, 0, 12, 12, 0, ...]) -> [T#32]
  Op#5 RESHAPE(T#32, T#5[1, 1, 38, 16]) -> [T#33]
  Op#6 CONV_2D(T#33, T#8, T#2) -> [T#34]
  Op#7 RESHAPE(T#34, T#6[1, 3, 16]) -> [T#35]
  Op#8 UNIDIRECTIONAL_SEQUENCE_LSTM(T#35, T#20, T#21, T#22, T#23, T#12, T#13, T#14, T#15, T#-1, T#-1, T#-1, T#16, T#17, T#18, T#19, T#-1, T#-1, T#10, T#36, T#-1, T#-1, T#-1, T#-1) -> [T#37]
  Op#9 STRIDED_SLICE(T#37, T#25[0, -1, 0], T#26[0, 0, 0], T#27[1, 1, 1]) -> [T#38]
  Op#10 FULLY_CONNECTED(T#38, T#24, T#9) -> [T#39]

Tensors

2023-10-07 21:45:15.036036: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2023-10-07 21:45:15.036435: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: models/model_0/model_0
2023-10-07 21:45:15.041238: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2023-10-07 21:45:15.041245: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: models/model_0/model_0
2023-10-07 21:45:15.048939: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
2023-10-07 21:45:15.052952: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2023-10-07 21:45:15.104447: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: models/model_0/model_0
2023-10-07 21:45:15.131022: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status

## Save the model as json

In [11]:
# Save the model as a JSON file (from RTNeural repo) ###################################
import model_utils_RTNeural

model_utils_RTNeural.save_model(model, filename="models/"+name+"/"+name+".json")

Skipping layer: <keras.src.engine.input_layer.InputLayer object at 0x16bf95290>
