# Set Up Imports

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Conv2DTranspose, Reshape, Dense
from keras import Model
import numpy as np
from tqdm import tqdm
print(f"Using Tensorflow v{tf.__version__}")

Using Tensorflow v2.10.0


# Encoder

In [4]:
encoder = Sequential(name="Encoder")
encoder.add(Conv2D(16, kernel_size=(4,1), strides=(1,2), padding='same', activation='relu', input_shape=(1,256,1), name="Encoder_Conv2D_1"))
encoder.add(Conv2D(32, kernel_size=(4,1), strides=(1,2), padding='same', activation='relu', name="Encoder_Conv2D_2"))
encoder.add(Reshape((1,-1), name="Encoder_Reshape"))
encoder.add(Dense(16, name="Encoder_Dense_1"))
encoder.add(Dense(16, name="Encoder_Dense_2"))

encoder.build()
encoder.summary()

Metal device set to: Apple M2

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Model: "Encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Encoder_Conv2D_1 (Conv2D)   (None, 1, 128, 16)        80        
                                                                 
 Encoder_Conv2D_2 (Conv2D)   (None, 1, 64, 32)         2080      
                                                                 
 Encoder_Reshape (Reshape)   (None, 1, 2048)           0         
                                                                 
 Encoder_Dense_1 (Dense)     (None, 1, 16)             32784     
                                                                 
 Encoder_Dense_2 (Dense)     (None, 1, 16)             272       
                                                                 
Total params: 35,216
Trainable params: 35,216
Non-trainable params: 0
____________________________________________

2023-02-22 13:32:27.286909: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-22 13:32:27.287034: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Decoder

In [5]:
decoder = tf.keras.models.Sequential(name="Decoder")
decoder.add(tf.keras.layers.Dense(128, input_shape=(1,16,1), name="Decoder_Dense"))
decoder.add(tf.keras.layers.Reshape((1,64,-1), name="Decoder_Reshape"))
decoder.add(tf.keras.layers.Conv2DTranspose(16, kernel_size=(4,1),strides=(1,2), padding='same', activation='relu', name="Decoder_Conv2D_T_1"))
decoder.add(tf.keras.layers.Conv2DTranspose(1, kernel_size=(4,1),strides=(1,2), padding='same', activation='sigmoid', name="Decoder_Conv2D_T_2"))

decoder.build()
decoder.summary()

Model: "Decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Decoder_Dense (Dense)       (None, 1, 16, 128)        256       
                                                                 
 Decoder_Reshape (Reshape)   (None, 1, 64, 32)         0         
                                                                 
 Decoder_Conv2D_T_1 (Conv2DT  (None, 1, 128, 16)       2064      
 ranspose)                                                       
                                                                 
 Decoder_Conv2D_T_2 (Conv2DT  (None, 1, 256, 1)        65        
 ranspose)                                                       
                                                                 
Total params: 2,385
Trainable params: 2,385
Non-trainable params: 0
_________________________________________________________________


# Autoencoder

In [6]:
class Autoencoder(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.recon_loss = []
        self.encoder = encoder
        self.decoder = decoder
        self.recon_loss_tracker = keras.metrics.Mean(name="recon_loss")
    
    @property
    def metrics(self):
        return [
            self.recon_loss_tracker
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            recon_results = self.decoder(self.encoder(data))
            recon_loss = tf.reduce_mean(
                tf.reduce_mean(
                    keras.losses.binary_crossentropy(data, recon_results)
                )
            )
        grads = tape.gradient(recon_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.recon_loss_tracker.update_state(recon_loss)
        self.recon_loss.append(recon_loss)

        return {
            "recon_loss": self.recon_loss_tracker.result()
        }

In [7]:
def normalize(tensor):
    tensor_minusmin = tensor - tensor.min()
    tensor_maxminusmin = tensor.max() - tensor.min()
    return tensor_minusmin / tensor_maxminusmin

def get_waves(file_name):
    # Read the raw audio from the .WAV file
    raw_audio = tf.io.read_file(filename=file_name)
    # Convert the raw audio to a waveform
    wave_bank, sample_rate = tf.audio.decode_wav(raw_audio)
    # Display the wavebank and sample_rate
    # display(wave_bank, sample_rate)
    wave_size = 256
    num_waves = 0
    waves = []
    wave_forms = np.transpose(wave_bank.numpy())
    for i in range(64):
        wave_form = wave_forms[0, i*wave_size:i*wave_size+wave_size]
        if wave_form.max() != wave_form.min():
            wave_form = normalize(wave_form)
            waves.append(wave_form)
            num_waves += 1    
    waves = tf.stack(waves)
    return waves

In [8]:
waves = get_waves("./audio_data/ENVELO01.WAV")
display(waves.shape)
waves = tf.expand_dims(waves,1)
waves = tf.expand_dims(waves,1)
waves = tf.expand_dims(waves,-1)
display(waves.shape)

TensorShape([64, 256])

TensorShape([64, 1, 1, 256, 1])

In [9]:
autoencoder = Autoencoder(encoder=encoder, decoder=decoder)
autoencoder.compile(optimizer=keras.optimizers.Adam())

In [10]:
a = decoder(encoder(waves[0]))

In [11]:
# display(a[:,:,0], a[:,:,:-1],a[:,:,-1] - a[:,:,0:])
display(tf.reduce_sum(tf.square(a[:,:,-1] - a[:,:,0:])))

<tf.Tensor: shape=(), dtype=float32, numpy=8.4760967e-07>

In [12]:
autoencoder.trainable_weights

[<tf.Variable 'Encoder_Conv2D_1/kernel:0' shape=(4, 1, 1, 16) dtype=float32, numpy=
 array([[[[-0.22445883,  0.13684371, -0.13878809, -0.26143137,
            0.10711834, -0.1259625 ,  0.1894348 , -0.06733131,
           -0.1407482 , -0.06130213,  0.00824285,  0.25258732,
            0.10115069, -0.23866373,  0.14494172, -0.03196549]]],
 
 
        [[[-0.14005706, -0.17090805,  0.21749711, -0.08438003,
            0.2744313 , -0.29555136,  0.2510811 , -0.15910752,
           -0.14773086, -0.07120493, -0.11530842, -0.09326124,
           -0.02963725, -0.16393417,  0.18538532,  0.03228503]]],
 
 
        [[[-0.11497854,  0.05914295,  0.05263075, -0.2347047 ,
            0.27086234, -0.1001002 , -0.0265649 ,  0.04926053,
           -0.20259422,  0.21495801,  0.01954329,  0.18228266,
            0.2399754 ,  0.11414355,  0.06880659,  0.29560614]]],
 
 
        [[[ 0.25683022, -0.04405144,  0.26515812, -0.10593542,
            0.22931892, -0.18088976, -0.18218996,  0.03108504,
            0

In [13]:
num_epochs = 50

for j in tqdm(range(num_epochs)):
    for i in waves:
        recon_loss = autoencoder.train_step(i)

100%|██████████| 50/50 [01:05<00:00,  1.31s/it]
