# Variational autoencoder (VAE) for reconstructing audio for AudioMNIST dataset

In [None]:
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
import librosa
import glob, os
import numpy as np
import tensorflow as tf
import vae

audio_files = []

directory = "C:/your/file/path"
max_files = 5
sr = 11025

for file_number in range(1, max_files + 1):
    directory_path = directory + (f"/0{file_number}" if file_number < 10 else f"/{file_number}")
    wav_paths = glob.glob(os.path.join(directory_path, '*.wav'))
    audio_files += [librosa.load(wav_path, sr = sr)[0] for wav_path in wav_paths]

maxl = np.max([audio_file.shape[0] for audio_file in audio_files])
max_length = sr
audio_files = np.array([np.pad(arr, (0, max_length - len(arr)), 'constant') for arr in audio_files])
labels = np.tile(np.repeat(np.arange(10), 50), max_files)

audio_files.shape, labels.shape

In [12]:
X_train, X_test, _, _ = train_test_split(audio_files, labels, test_size = 1/3, random_state = 44, shuffle = True)
X_train.shape

(1666, 11025)

In [4]:
data_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
data_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [5]:
latent_dim = 16
model = vae.VAE(latent_dim = latent_dim)

In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-2)
model.train_step(data_train, optimizer)

In [9]:
train_dataset = (tf.data.Dataset.from_tensor_slices(data_train).shuffle(data_train.shape[0]).batch(32))
test_dataset = (tf.data.Dataset.from_tensor_slices(data_test).shuffle(data_test.shape[0]).batch(32))

In [10]:
epochs = 15

for epoch in range(1, epochs + 1):
    for train in train_dataset:
        model.train_step(train, optimizer)
    
    loss = tf.keras.metrics.Mean()
    for test in test_dataset:
        loss(model.compute_loss(test))
    elbo = -loss.result()
    print(f"Epoch: {epoch}, elbo = {elbo}")


Epoch: 1, elbo = -10713.0205078125
Epoch: 2, elbo = -10638.80078125
Epoch: 3, elbo = -10028.82421875
Epoch: 4, elbo = -9681.9208984375
Epoch: 5, elbo = -9647.96875
Epoch: 6, elbo = -9625.13671875
Epoch: 7, elbo = -9606.4013671875
Epoch: 8, elbo = -9590.5810546875
Epoch: 9, elbo = -9576.76171875
Epoch: 10, elbo = -9563.787109375
Epoch: 11, elbo = -9551.4599609375
Epoch: 12, elbo = -9539.3828125
Epoch: 13, elbo = -9527.6474609375
Epoch: 14, elbo = -9516.015625
Epoch: 15, elbo = -9503.78125


In [13]:
model.summary()