# Set Up Imports

In [21]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Conv2DTranspose, Reshape, Dense
from keras import Model
import numpy as np
print(f"Using Tensorflow v{tf.__version__}")

Using Tensorflow v2.10.0


# Encoder

In [19]:
encoder = Sequential(name="Encoder")
encoder.add(Conv2D(16, kernel_size=(4,1), strides=(1,2), padding='same', activation='relu', input_shape=(1,256,1), name="Encoder_Conv2D_1"))
encoder.add(Conv2D(32, kernel_size=(4,1), strides=(1,2), padding='same', activation='relu', name="Encoder_Conv2D_2"))
encoder.add(Reshape((1,-1), name="Encoder_Reshape"))
encoder.add(Dense(16, name="Encoder_Dense_1"))
encoder.add(Dense(16, name="Encoder_Dense_2"))

encoder.build()
encoder.summary()

Model: "Encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Encoder_Conv2D_1 (Conv2D)   (None, 1, 128, 16)        80        
                                                                 
 Encoder_Conv2D_2 (Conv2D)   (None, 1, 64, 32)         2080      
                                                                 
 Encoder_Reshape (Reshape)   (None, 1, 2048)           0         
                                                                 
 Encoder_Dense_1 (Dense)     (None, 1, 16)             32784     
                                                                 
 Encoder_Dense_2 (Dense)     (None, 1, 16)             272       
                                                                 
Total params: 35,216
Trainable params: 35,216
Non-trainable params: 0
_________________________________________________________________


# Decoder

In [16]:
decoder = tf.keras.models.Sequential(name="Decoder")
decoder.add(tf.keras.layers.Dense(128, input_shape=(1,16,1), name="Decoder_Dense"))
decoder.add(tf.keras.layers.Reshape((1,64,-1), name="Decoder_Reshape"))
decoder.add(tf.keras.layers.Conv2DTranspose(16, kernel_size=(4,1),strides=(1,2), padding='same', activation='relu', name="Decoder_Conv2D_T_1"))
decoder.add(tf.keras.layers.Conv2DTranspose(1, kernel_size=(4,1),strides=(1,2), padding='same', activation='sigmoid', name="Decoder_Conv2D_T_2"))

decoder.build()
decoder.summary()

Model: "Decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Decoder_Dense (Dense)       (None, 1, 16, 128)        256       
                                                                 
 Decoder_Reshape (Reshape)   (None, 1, 64, 32)         0         
                                                                 
 Decoder_Conv2D_T_1 (Conv2DT  (None, 1, 128, 16)       2064      
 ranspose)                                                       
                                                                 
 Decoder_Conv2D_T_2 (Conv2DT  (None, 1, 256, 1)        65        
 ranspose)                                                       
                                                                 
Total params: 2,385
Trainable params: 2,385
Non-trainable params: 0
_________________________________________________________________


# Autoencoder

In [57]:
class Autoencoder(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.recon_loss_tracker = keras.metrics.Mean(name="recon_loss")
    
    @property
    def metrics(self):
        return [
            self.recon_loss_tracker
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            recon_results = self.decoder(self.encoder(data))
            recon_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, recon_results)
                )
            )
        grads = tape.gradient(recon_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.recon_loss_tracker.update_state(recon_loss)
        return {
            "recon_loss": self.recon_loss_tracker.result()
        }

In [58]:
def normalize(tensor):
    tensor_minusmin = tensor - tensor.min()
    tensor_maxminusmin = tensor.max() - tensor.min()
    return tensor_minusmin / tensor_maxminusmin

def get_waves(file_name):
    # Read the raw audio from the .WAV file
    raw_audio = tf.io.read_file(filename=file_name)
    # Convert the raw audio to a waveform
    wave_bank, sample_rate = tf.audio.decode_wav(raw_audio)
    # Display the wavebank and sample_rate
    # display(wave_bank, sample_rate)
    wave_size = 256
    num_waves = 0
    waves = []
    wave_forms = np.transpose(wave_bank.numpy())
    for i in range(64):
        wave_form = wave_forms[0, i*wave_size:i*wave_size+wave_size]
        if wave_form.max() != wave_form.min():
            wave_form = normalize(wave_form)
            waves.append(wave_form)
            num_waves += 1    
    waves = tf.stack(waves)
    return waves

In [59]:
waves = get_waves("./audio_data/ENVELO01.WAV")
display(waves.shape)
waves = tf.expand_dims(waves,1)
waves = tf.expand_dims(waves,1)
waves = tf.expand_dims(waves,-1)
display(waves.shape)

TensorShape([64, 256])

TensorShape([64, 1, 1, 256, 1])

In [60]:
autoencoder = Autoencoder(encoder=encoder, decoder=decoder)
autoencoder.compile(optimizer=keras.optimizers.Adam())

In [61]:
display(encoder(waves[0]))

<tf.Tensor: shape=(1, 1, 16), dtype=float32, numpy=
array([[[-0.01558048,  0.0111496 ,  0.01787113, -0.04636577,
         -0.00938693, -0.01669247, -0.00726766, -0.02033841,
         -0.02910567,  0.03464256, -0.02980057, -0.01850165,
         -0.03295462, -0.05090476,  0.03763466,  0.00575553]]],
      dtype=float32)>

In [62]:
num_epochs = 50

for i in range(num_epochs):
    for i in waves:
        recon_loss = autoencoder.train_step(i)