In [27]:
import argparse
import os
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras

In [28]:
model = ""
mfcc = True
silence = True
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [29]:
if silence is True:
    data_dir = os.path.join('.', 'data', 'mini_speech_commands_silence')
else:
    zip_path = tf.keras.utils.get_file(
        origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
        fname='mini_speech_commands.zip',
        extract=True,
        cache_dir='.', cache_subdir='data')

    data_dir = os.path.join('.', 'data', 'mini_speech_commands')

In [30]:
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
if silence is True:
    total = 9000
else:
    total = 8000

In [31]:
num_samples

80

In [32]:
train_files = filenames[:int(total*0.8)]
val_files = filenames[int(total*0.8): int(total*0.9)]
test_files = filenames[int(total*0.9):]

In [68]:
train_files

<tf.Tensor: shape=(80,), dtype=string, numpy=
array([b'.\\data\\small data size\\right\\0e17f595_nohash_1.wav',
       b'.\\data\\small data size\\up\\0e17f595_nohash_0.wav',
       b'.\\data\\small data size\\no\\0b77ee66_nohash_0.wav',
       b'.\\data\\small data size\\left\\0ff728b5_nohash_4.wav',
       b'.\\data\\small data size\\left\\0d393936_nohash_0.wav',
       b'.\\data\\small data size\\up\\0c5027de_nohash_1.wav',
       b'.\\data\\small data size\\no\\0cd323ec_nohash_1.wav',
       b'.\\data\\small data size\\stop\\0e5193e6_nohash_0.wav',
       b'.\\data\\small data size\\stop\\0b56bcfe_nohash_0.wav',
       b'.\\data\\small data size\\no\\0e5193e6_nohash_0.wav',
       b'.\\data\\small data size\\no\\0bd689d7_nohash_0.wav',
       b'.\\data\\small data size\\right\\0c2ca723_nohash_0.wav',
       b'.\\data\\small data size\\right\\0c40e715_nohash_1.wav',
       b'.\\data\\small data size\\go\\0ab3b47d_nohash_0.wav',
       b'.\\data\\small data size\\go\\0a9f9af7_nohash_

In [33]:
LABELS = np.array(tf.io.gfile.listdir(str(data_dir)))
LABELS = LABELS[LABELS != 'README.md']

In [34]:
LABELS

array(['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes'],
      dtype='<U5')

In [91]:
class SignalGenerator:
    def __init__(self, labels, sampling_rate, frame_length, frame_step,
            num_mel_bins=None, lower_frequency=None, upper_frequency=None,
            num_coefficients=None, mfcc=False):
        self.labels = labels
        self.sampling_rate = sampling_rate
        self.frame_length = frame_length
        self.frame_step = frame_step
        self.num_mel_bins = num_mel_bins
        self.lower_frequency = lower_frequency
        self.upper_frequency = upper_frequency
        self.num_coefficients = num_coefficients
        num_spectrogram_bins = (frame_length) // 2 + 1

        if mfcc is True:
            self.linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                    self.num_mel_bins, num_spectrogram_bins, self.sampling_rate,
                    self.lower_frequency, self.upper_frequency)
            self.preprocess = self.preprocess_with_mfcc
        else:
            self.preprocess = self.preprocess_with_stft

    def read(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]
        
        label_id = tf.argmax(label == self.labels)
        audio_binary = tf.io.read_file(file_path)
        audio, _ = tf.audio.decode_wav(audio_binary)
        audio = tf.squeeze(audio, axis=1)

        return audio, label_id

    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio, zero_padding], 0)
        audio.set_shape([self.sampling_rate])

        return audio

    def get_spectrogram(self, audio):
        stft = tf.signal.stft(audio, frame_length=self.frame_length,
                frame_step=self.frame_step, fft_length=self.frame_length)
        spectrogram = tf.abs(stft)

        return spectrogram

    def get_mfccs(self, spectrogram):
        mel_spectrogram = tf.tensordot(spectrogram,
                self.linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :self.num_coefficients]

        return mfccs

    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        spectrogram = tf.image.resize(spectrogram, [32, 32])

        return spectrogram, label

    def preprocess_with_mfcc(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        mfccs = self.get_mfccs(spectrogram)
        mfccs = tf.expand_dims(mfccs, -1)

        return mfccs, label

    def make_dataset(self, files, train):
        ds = tf.data.Dataset.from_tensor_slices(files)
        print(ds.shape)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()
        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds


In [92]:
STFT_OPTIONS = {'frame_length': 256, 'frame_step': 128, 'mfcc': False}
MFCC_OPTIONS = {'frame_length': 640, 'frame_step': 320, 'mfcc': True,
        'lower_frequency': 20, 'upper_frequency': 4000, 'num_mel_bins': 40,
        'num_coefficients': 10}

In [93]:
if mfcc is True:
    options = MFCC_OPTIONS
    strides = [2, 1]
else:
    options = STFT_OPTIONS
    strides = [2, 2]

In [94]:
generator = SignalGenerator(LABELS, 16000, **options)
train_ds = generator.make_dataset(train_files, True)
val_ds = generator.make_dataset(val_files, False)
test_ds = generator.make_dataset(test_files, False)

AttributeError: 'TensorSliceDataset' object has no attribute 'shape'

In [96]:
train_files.shape

TensorShape([80])

In [39]:
# if args.silence is True:
#     units = 9
# else:
#     units = 8

In [40]:
MLPmodel = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(8)
])

In [49]:
MLPmodel.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 490)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               125696    
_________________________________________________________________
dense_7 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_8 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_9 (Dense)              (None, 8)                 2056      
Total params: 259,336
Trainable params: 259,336
Non-trainable params: 0
_________________________________________________________________


In [41]:
CNNmodel = keras.Sequential([
    keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=strides, use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=[1,1], use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=[1,1], use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(units=8)
])

In [42]:
Ds_CNNmodel = keras.Sequential([
    keras.layers.Conv2D(filters=256, kernel_size=[3,3], strides=strides, use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.DepthwiseConv2D(kernel_size=[3,3], strides=[1,1],use_bias=False),
    keras.layers.Conv2D(filters=256, kernel_size=[1,1], strides=[1,1], use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.DepthwiseConv2D(kernel_size=[3,3], strides=[1,1],use_bias=False),
    keras.layers.Conv2D(filters=128, kernel_size=[1,1], strides=[1,1], use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(units=8)
])

In [53]:
metrics = [tf.metrics.SparseCategoricalAccuracy()]
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [54]:
MLPmodel.compile(optimizer='adam',
              loss=loss,
              metrics=metrics)

CNNmodel.compile(optimizer='adam',
              loss=loss,
              metrics=metrics)

Ds_CNNmodel.compile(optimizer='adam',
              loss=loss,
              metrics=metrics)

In [55]:
cp_callback = keras.callbacks.ModelCheckpoint(
    #'./callback_test_chkp/chkp_{epoch:02d}',
    './callback_test_chkp/MLP_best',
    monitor='val_accuracy',
    verbose=0, 
    #save_best_only=False,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch'
)
MLPmodel.fit(train_ds, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x27fdd190b48>

In [56]:
cp_callback = keras.callbacks.ModelCheckpoint(
    #'./callback_test_chkp/chkp_{epoch:02d}',
    './callback_test_chkp/CNN_best',
    monitor='val_accuracy',
    verbose=0, 
    #save_best_only=False,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch'
)
CNNmodel.fit(train_ds, epochs=2, callbacks = cp_callback)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x27fdd1901c8>

In [60]:
train_ds

<ShuffleDataset shapes: ((None, 49, 10, 1), (None,)), types: (tf.float32, tf.int64)>

In [57]:
cp_callback = keras.callbacks.ModelCheckpoint(
    #'./callback_test_chkp/chkp_{epoch:02d}',
    './callback_test_chkp/Ds_CNN_best',
    monitor='val_accuracy',
    verbose=0, 
    #save_best_only=False,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch'
)
Ds_CNNmodel.fit(train_ds, epochs=2, callbacks = cp_callback)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x27fdd255ec8>

In [59]:
test_acc = MLPmodel.evaluate(test_ds, verbose=0)
print('\n accuracy for MLP :', test_acc)

# test_acc = CNNmodel.evaluate(test_ds, verbose=0)
# print('\n accuracy for CNN :', test_acc)

# test_acc = Ds_CNNmodel.evaluate(test_ds, verbose=0)
# print('\n accuracy for Ds_CNN :', test_acc)


 accuracy for MLP : [None, None]


In [55]:
# run_model = tf.function(lambda x: MLPmodel(x))
# concrete_func = run_model.get_concrete_function()
# MLPmodel.save('MLP', signatures=concrete_func)

NameError: name 'concrete_func' is not defined