In [None]:
# Small GPU Bugfix and all imports
import tensorflow
physical_devices = tensorflow.config.list_physical_devices('GPU')
tensorflow.config.experimental.set_memory_growth(physical_devices[0], enable=True)

import tensorflow_datasets as tfds
import numpy as np
from tensorflow import  numpy_function
from tensorflow import convert_to_tensor
from tensorflow import float32
import tensorflow.keras as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Convolution1D, Flatten, Dense
from tensorflow.keras.activations import softmax
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.keras.models import load_model
import librosa
import os

In [None]:
# Loading Dataset
dataset = tfds.load(name="speech_commands", split="train")
dataset = dataset.shuffle(1024)
dataset_valid = tfds.load(name="speech_commands", split="validation")

In [None]:
# Dataset Prep Functions
def pad_spectrogram(spec, len):
    zrs = np.zeros((spec.shape[0], len - spec.shape[1]), dtype=np.int64)
    padded = np.concatenate([zrs, spec], 1)
    return padded


def to_spectrogram(x):
    def mapping_function(audio, label):
        S = librosa.feature.melspectrogram(audio.astype(np.float32), sr=16000, n_fft=160, hop_length=400,
                                           n_mels=40)
        S_DB = librosa.power_to_db(S, ref=np.max)
        if S_DB.shape[1] != 41:
            S_DB = pad_spectrogram(S_DB, 41)
        S_DB = S_DB.T.reshape((41, 40))
        x = convert_to_tensor(S_DB.astype(np.float32))
        y = convert_to_tensor(to_categorical(label, 12).astype(np.float32))
        return x, y

    audios, labels = numpy_function(mapping_function, [x['audio'], x['label']], [float32, float32])
    audios.set_shape((41, 40))
    labels.set_shape((12,))
    return audios, labels


In [None]:
# Apply Operations to Dataset
dn = dataset.map(to_spectrogram)
dataset_valid = dataset_valid.map(to_spectrogram)


In [None]:
# Visualize Dataset
from librosa.display import specshow
import matplotlib.pyplot as plt

for x, y in dn.take(1):
    specshow(x.numpy(), sr=16000, hop_length=400, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.show()

In [None]:
# Train Run prep
RUN_NAME = 'conv1d'
tbcallback = TensorBoard(log_dir='logs/' + RUN_NAME, profile_batch=4)
if 'saved_models' not in os.listdir('.'):
    os.mkdir('saved_models')
checkpointcallback = ModelCheckpoint('saved_models/'+RUN_NAME + '.h5', monitor='train-acc', save_best_only=False)
dn = dn.batch(64)
dataset_valid = dataset_valid.batch(64)

In [None]:
# Model Definition
model = tf.Sequential()
model.add(Convolution1D(82, 5, input_shape=(41, 40)))
model.add(Convolution1D(20, 3))
model.add(Flatten())
model.add(Dense(32))
model.add(Dense(128))
model.add(Dense(12, activation=softmax))
model.summary()
model.compile(tf.optimizers.Adam(0.0001), loss=tf.losses.categorical_crossentropy, metrics=['acc'])

In [None]:
# Load Model
model = load_model('saved_models/'+RUN_NAME + '.h5')

In [None]:
# Model Training
model.fit(dn, callbacks=[tbcallback, checkpointcallback], epochs=512, validation_data=dataset_valid)

In [None]:
# Model Testing
conf_matrix = np.zeros((12, 12), np.uint64)
for di in dn.take(100):
    preds = model.predict(di[0])
    for p, t in zip(np.argmax(preds, axis=-1), np.argmax(di[1], axis=-1)):
        conf_matrix[p, t] += 1
print(conf_matrix)

