In [1]:
import argparse
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import time as t

from tensorflow import convert_to_tensor, float32, tensordot
from tensorflow import abs as tfabs
from tensorflow.io import serialize_tensor, write_file
from tensorflow.math import log
from tensorflow.signal import linear_to_mel_weight_matrix,mfccs_from_log_mel_spectrograms, stft
dataset_dir = 'data/mini_speech_commands_datasets'

mods_names = ['mlp','cnn','dscnn']
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = keras.metrics.SparseCategoricalAccuracy()


In [2]:

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
mfccs = True

if not os.path.exists('data/mini_speech_commands'):
    zip_path = tf.keras.utils.get_file(
        origin='http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip',
        fname='mini_speech_commands.zip',
        extract=True,
        cache_dir='.', cache_subdir='data')

data_dir = os.path.join('.','data', 'mini_speech_commands')
#filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
#filenames = tf.random.shuffle(filenames)
#n = len(filenames)



LABELS = np.array(tf.io.gfile.listdir(str(data_dir))) 
LABELS = [label for label in LABELS if label != 'README.md']

class SignalGenerator:
    def __init__(self, labels, sampling_rate=16000, frame_length=1920 , frame_step=960, num_mel_bins=40,
                 lower_freq=20, upper_freq=48000, num_coefficients=10, mfccs=False):
        self.frame_length = frame_length
        self.frame_step = frame_step
        self.mel_inputs =  [num_mel_bins, None, sampling_rate, lower_freq, upper_freq]
        self.mfccs_coeff = num_coefficients
        self.labels=labels
        self.sampling_rate=sampling_rate
        num_spectrogram_bins = (frame_length) // 2 + 1

        if mfccs:
            self.l2mel_matrix = linear_to_mel_weight_matrix(
                    self.num_mel_bins, num_spectrogram_bins, self.sampling_rate,
                    lower_freq, upper_freq)
            self.preprocess = self.preprocess_with_mfcc
        else:
            self.preprocess = self.preprocess_with_stft

    def read(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]
        label_id = tf.argmax(label == self.labels)
        audio_binary = tf.io.read_file(file_path)
        audio, _ = tf.audio.decode_wav(audio_binary)
        audio = tf.squeeze(audio, axis=1)
        return audio, label_id

    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio,zero_padding],0)
        audio.set_shape([self.sampling_rate])
        return audio

    def get_spectrogram(self, audio):
        tfstft = stft(audio, frame_length=self.frame_length, frame_step=self.frame_step,fft_length=self.frame_length)
        spectrogram = tf.abs(tfstft)
        return spectrogram

    def get_mfcc(self, spectrogram):
        mel_spectrogram = tensordot(spectrogram, self.l2mel_matrix, 1)
        log_mel_spectrogram = log(mel_spectrogram + 1e-6)
        mfccs = mfccs_from_log_mel_spectrograms(log_mel_spectrogram)[..., :self.mfccs_coeff]
        return mfccs

    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        spectrogram = tf.image.resize(spectrogram, [32,32])
        return spectrogram, label

    def preprocess_with_mfcc(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        mfccs = get_mfcc(spectrogram)
        return mfccs, label

    def make_dataset(self, files, train):
        ds = tf.data.Dataset.from_tensor_slices(files)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()

        if train:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds


if not os.path.exists(dataset_dir):
    os.mkdir(dataset_dir)
    train_files = tf.strings.split(tf.io.read_file('./kws_train_split.txt'),sep='\n')[:-1]
    val_files = tf.strings.split(tf.io.read_file('./kws_val_split.txt'),sep='\n')[:-1]
    test_files = tf.strings.split(tf.io.read_file('./kws_test_split.txt'),sep='\n')[:-1]
    generator = SignalGenerator(LABELS)
    train_ds = generator.make_dataset(train_files, True)
    val_ds = generator.make_dataset(val_files, False)
    test_ds = generator.make_dataset(test_files, False)
    tf.data.experimental.save(train_ds, f'{dataset_dir}/th_train')
    tf.data.experimental.save(val_ds, f'{dataset_dir}/th_val')
    tf.data.experimental.save(test_ds, f'{dataset_dir}/th_test')
    

stride = [2,2] if not mfccs else [2,1]

MLP = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(units=256,activation=keras.activations.relu),
    keras.layers.Dense(units=256,activation=keras.activations.relu),
    keras.layers.Dense(units=256,activation=keras.activations.relu),
    keras.layers.Dense(units=len(LABELS))
])

CNN = keras.Sequential([
    keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides = stride, use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides = stride, use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides = stride, use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(units=len(LABELS))
])

DSCNN = keras.Sequential([
    keras.layers.Conv2D(filters=256, kernel_size=[3, 3], strides=stride, use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
    keras.layers.Conv2D(filters=256, kernel_size=[1, 1], strides=[1, 1], use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
    keras.layers.Conv2D(filters=256, kernel_size=[1, 1], strides=[1, 1], use_bias=False),
    keras.layers.BatchNormalization(momentum=0.1),
    keras.layers.ReLU(),
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(units=len(LABELS))
])

models = [MLP,CNN,DSCNN]

for i,model in enumerate(models):
    model.compile(optimizer='adam',loss=loss, metrics=[metric])
    cp_callback = keras.callbacks.ModelCheckpoint(
        f'./callback_test_chkp/{mods_names[i]}_chkp_best',
        monitor='val_loss',
        verbose=0, 
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        save_freq='epoch'
    )
    model.fit(train_ds, batch_size=32, epochs=20, validation_data=val_ds,callbacks=[cp_callback])
    model.summary()
    start = t.time()
    test_loss, test_acc2 = model.evaluate(test_ds, verbose=2)
    end = t.time() - start
    msize = os.path.getsize(f'./callback_test_chkp/{mods_names[i]}_chkp_best/saved_model.pb')
    print()
    print(f'acc: {test_acc2}, size: {msize} Inference Latency {end}ms')
    print()




Epoch 1/20
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: ./callback_test_chkp/mlp_chkp_best/assets
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               262400    
_________________________________________________________________
dense_1 (Dense)              (None, 256)  

Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 15, 30, 128)       1152      
_________________________________________________________________
batch_normalization (BatchNo (None, 15, 30, 128)       512       
_________________________________________________________________
re_lu (ReLU)                 (None, 15, 30, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 7, 28, 128)        147456    
_________________________________________________________________
batch_normalization_1 (Batch (None, 7, 28, 128)        512       
_________________________________________________________________
re_lu_1 (ReLU)       

Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 15, 30, 256)       2304      
_________________________________________________________________
batch_normalization_3 (Batch (None, 15, 30, 256)       1024      
_________________________________________________________________
re_lu_3 (ReLU)               (None, 15, 30, 256)       0         
_________________________________________________________________
depthwise_conv2d (DepthwiseC (None, 13, 28, 256)       2304      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 28, 256)       65536     
_________________________________________________________________
batch_normalization_4 (Batch (None, 13, 28,

## saves and run tflite model

In [4]:
import tensorflow.lite as tflite
import zlib
#import tensorflow_model_optimization as tfmot
#pruning_params = {'pruning_schedule':tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.30, 
#                                                                          final_sparsity=0.8,
#                                                                          begin_step=len(train_ds)*5,
#                                                                          end_step=len(train_ds)*15)}
#prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude


tensor_specs = (tf.TensorSpec([None,32,32,1],dtype=tf.float32),tf.TensorSpec([None,],dtype=tf.int64))
train_ds = tf.data.experimental.load(f'{dataset_dir}/th_train',tensor_specs)
val_ds = tf.data.experimental.load(f'{dataset_dir}/th_val', tensor_specs)
test_ds = tf.data.experimental.load(f'{dataset_dir}/th_test',tensor_specs)
test_ds = test_ds.unbatch().batch(1)

def convert(img, target_type_min, target_type_max, target_type):
    img = img
    imin = img.min()
    imax = img.max()

    a = (target_type_max - target_type_min) / (imax - imin)
    b = target_type_max - a * imax
    new_img = (a * img + b).astype(target_type)
    return new_img

def representative_dataset_gen():
    for x, _ in train_ds.take(100):
        yield [x]
    
for mod in mods_names:
    #saving
    tflite_dirs = './tflite_models'
    
    if mod != 'dscnn':
        continue
    converter = tf.lite.TFLiteConverter.from_saved_model(f'./callback_test_chkp/{mod}_chkp_best/')
    tflite_model = converter.convert()
    if not os.path.exists(tflite_dirs): 
        os.mkdir(tflite_dirs)
    with open(tflite_dirs+f'/{mod}_basic.tflite', 'wb') as f:
        f.write(tflite_model)
    tflo_size=os.path.getsize(tflite_dirs+f"/{mod}_basic.tflite")
    print(f'size of basic model: {tflo_size/1024}kB')
    
    '''
    tflite_opt_model = converter.convert()
    if not os.path.exists(tflite_dirs): 
        os.mkdir(tflite_dirs)
    with open(tflite_dirs+f'/{mod}_opt.tflite', 'wb') as f:
        f.write(tflite_opt_model)
    tflo_size=os.path.getsize(tflite_dirs+f"/{mod}_opt.tflite")
    print(f'size of optimized model: {tflo_size/1024}kB')
    converter = tf.lite.TFLiteConverter.from_saved_model(tflite_dirs+f'/{mod}_opt.tflite')
    '''
    converter = tf.lite.TFLiteConverter.from_saved_model(f'./callback_test_chkp/{mod}_chkp_best/')
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    #converter.representative_dataset = representative_dataset_gen
    #converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    #converter.inference_input_type = tf.uint8  # or tf.uint8
    #converter.inference_output_type = tf.uint8 
    tflite_quant_model = converter.convert()
    
    
    if not os.path.exists(tflite_dirs): 
        os.mkdir(tflite_dirs)
    with open(tflite_dirs+f'/{mod}.tflite', 'wb') as f:
        f.write(tflite_quant_model)
    tfl_size=os.path.getsize(tflite_dirs+f"/{mod}.tflite")
    with open(tflite_dirs+f"/{mod}_compressed.tflite.zlib", 'wb') as fp:
        tflite_compressed = zlib.compress(tflite_quant_model)
        fp.write(tflite_compressed)
    tflc_size=os.path.getsize(tflite_dirs+f"/{mod}_compressed.tflite.zlib")
    
    print(f'size of optimized model: {tfl_size/1024}kB \ncompressed: {tflc_size/1024}kB')
    
    interpreter = tflite.Interpreter(model_path = tflite_dirs+f"/{mod}.tflite")
    interpreter.allocate_tensors()
    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    print('input: ', input_details[0]['dtype'])
    output_details = interpreter.get_output_details()
    print('output: ', output_details[0]['dtype'])

    input_shape = input_details[0]['shape']
    num_corr = 0
    num = 0
    start = t.time()
    for input_data,label in test_ds:
        #input_data = convert(input_data, 0, 255, np.uint8)
        #label = convert(label, 0, 255, np.uint8)
        #input_data = tf.quantization.quantize(input_data,min(input_data),max(input_data),tf.quint8)
        #label = tf.quantization.quantize(label,min(LABELS),max(LABELS),tf.quint8)
        num += 1
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        output_data = np.argmax(interpreter.get_tensor(output_details[0]['index']))
        y_pred = tf.constant([output_data],dtype=tf.int64)
        if label.numpy()[0] == output_data:
            num_corr+=1
    end = t.time() - start
    print(f'accuracy: {num_corr/num} tflite size: {tfl_size/1024}kB compressed: {tflc_size/1024}kB time: {end}ms')

size of basic model: 555.7421875kB
size of optimized model: 146.3125kB 
compressed: 114.5927734375kB
input:  <class 'numpy.float32'>
output:  <class 'numpy.float32'>
accuracy: 0.70375 tflite size: 146.3125kB compressed: 114.5927734375kB time: 7.691739320755005ms


In [None]:

model = prune_low_magnitude(model, **pruning_params)