In [9]:
import argparse
import os
import numpy as np
import os
import tensorflow as tf
import zlib
# parser = argparse.ArgumentParser()
# parser.add_argument('--model', type=str, required=True, help='model name')
# parser.add_argument('--mfcc', action='store_true', help='use MFCCs')
# parser.add_argument('--silence', action='store_true', help='add silence')
# args = parser.parse_args()

use_mfccs = True

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

zip_path = tf.keras.utils.get_file(
  origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
  fname='mini_speech_commands.zip',
  extract=True,
  cache_dir='.', cache_subdir='data')

data_dir = os.path.join('.', 'data', 'mini_speech_commands')

train_files = open('kws_train_split.txt','r').read().splitlines()
val_files = open('kws_val_split.txt','r').read().splitlines()
test_files = open('kws_test_split.txt','r').read().splitlines()
train_files = tf.convert_to_tensor(train_files) 
val_files = tf.convert_to_tensor(val_files)  
test_files = tf.convert_to_tensor(test_files) 



LABELS = np.array(tf.io.gfile.listdir(str(data_dir)))
LABELS = LABELS[LABELS != 'README.md']

In [10]:


class SignalGenerator:
    def __init__(self, labels, sampling_rate, frame_length, frame_step,
            num_mel_bins=None, lower_frequency=None, upper_frequency=None,
            num_coefficients=None, mfcc=False):
        self.labels = labels
        self.sampling_rate = sampling_rate
        self.frame_length = frame_length
        self.frame_step = frame_step
        self.num_mel_bins = num_mel_bins
        self.lower_frequency = lower_frequency
        self.upper_frequency = upper_frequency
        self.num_coefficients = num_coefficients
        num_spectrogram_bins = (frame_length) // 2 + 1

        if mfcc is True:
            self.linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                    self.num_mel_bins, num_spectrogram_bins, self.sampling_rate,
                    self.lower_frequency, self.upper_frequency)
            self.preprocess = self.preprocess_with_mfcc
        else:
            self.preprocess = self.preprocess_with_stft

    def read(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]
        label_id = tf.argmax(label == self.labels)
        audio_binary = tf.io.read_file(file_path)
        audio, _ = tf.audio.decode_wav(audio_binary)
        audio = tf.squeeze(audio, axis=1)

        return audio, label_id

    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio, zero_padding], 0)
        audio.set_shape([self.sampling_rate])

        return audio

    def get_spectrogram(self, audio):
        stft = tf.signal.stft(audio, frame_length=self.frame_length,
                frame_step=self.frame_step, fft_length=self.frame_length)
        spectrogram = tf.abs(stft)

        return spectrogram

    def get_mfccs(self, spectrogram):
        mel_spectrogram = tf.tensordot(spectrogram,
                self.linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :self.num_coefficients]

        return mfccs

    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        spectrogram = tf.image.resize(spectrogram, [32, 32])

        return spectrogram, label

    def preprocess_with_mfcc(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        mfccs = self.get_mfccs(spectrogram)
        mfccs = tf.expand_dims(mfccs, -1)

        return mfccs, label

    def make_dataset(self, files, train):
        ds = tf.data.Dataset.from_tensor_slices(files)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()
        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds

if use_mfccs:
  OPTIONS = {'frame_length': 640, 'frame_step': 320, 'mfcc': True,
        'lower_frequency': 20, 'upper_frequency': 4000, 'num_mel_bins': 40,
        'num_coefficients': 10}
else:
  OPTIONS = {'frame_length': 256, 'frame_step': 128, 'mfcc': False}

generator = SignalGenerator(LABELS, 16000, **OPTIONS)
train_ds = generator.make_dataset(train_files, True)
val_ds = generator.make_dataset(val_files, False)
test_ds = generator.make_dataset(test_files, False)

for x, y in train_ds:
    input_shape = x.shape.as_list()[1:]
    output_shape = y.shape.as_list()[1:]
    break

print(f'Input shape: {input_shape}')

output_shape = 8

Input shape: [49, 10, 1]


In [11]:
class MyModel:
    def __init__(self, model_name, alpha, input_shape, output_shape, final_sparsity=None):
        
        if model_name == 'mlp':
          model = tf.keras.Sequential([
                tf.keras.layers.Flatten(input_shape=input_shape, name='flatten'),
                tf.keras.layers.Dense(256, activation='relu', name='first_dense'),
                tf.keras.layers.Dense(256, activation='relu', name='second_dense'),
                tf.keras.layers.Dense(256, activation='relu', name='third_dense'),
                tf.keras.layers.Dense(output_shape, name='fourth_dense')])
          
        elif model_name == 'cnn':
          if use_mfccs:
            strides = [2, 1]
          else:
            strides = [2, 2]
    
          model = tf.keras.Sequential([
                tf.keras.layers.Conv2D(input_shape = input_shape, filters=128, kernel_size=[3,3], strides=strides,use_bias=False,activation='relu', name='first_conv1d'),
                tf.keras.layers.BatchNormalization(momentum=0.1),
                tf.keras.layers.ReLU(),
                tf.keras.layers.Conv2D(input_shape = input_shape, filters=128, kernel_size=[3,3],strides=[1, 1],use_bias=False,name='second_conv1d'),
                tf.keras.layers.BatchNormalization(momentum=0.1),
                tf.keras.layers.ReLU(),
                tf.keras.layers.Conv2D(input_shape = input_shape, filters=128, kernel_size=[3,3], strides=[1, 1],use_bias=False,name='third_conv1d'),
                tf.keras.layers.BatchNormalization(momentum=0.1),
                tf.keras.layers.ReLU(),
                tf.keras.layers.GlobalAveragePooling2D(),
                tf.keras.layers.Dense(output_shape, name='fc')])
          
        elif model_name == 'ds-cnn':
          if use_mfccs:
            strides = [2, 1]
          else:
            strides = [2, 2]
            
          model = tf.keras.Sequential([tf.keras.layers.Conv2D(input_shape = input_shape, filters=int(256*alpha),kernel_size=[3,3], strides=strides,use_bias=False,name='first_conv1d'),
            tf.keras.layers.BatchNormalization(momentum=0.1),
            tf.keras.layers.ReLU(),
            tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3],strides=[1, 1],use_bias=False),
            tf.keras.layers.Conv2D(input_shape = input_shape, filters=int(256*alpha), kernel_size=[1, 1], strides=[1, 1],use_bias=False,name='second_conv1d'),
            tf.keras.layers.BatchNormalization(momentum=0.1),
            tf.keras.layers.ReLU(),
            tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3],strides=[1, 1],use_bias=False,),
            tf.keras.layers.Conv2D(input_shape = input_shape, filters=int(alpha*256), kernel_size=[1, 1], strides=[1, 1],use_bias=False,name='third_conv1d'),
            tf.keras.layers.BatchNormalization(momentum=0.1),
            tf.keras.layers.ReLU(),
            # tf.keras.layers.Dense(units=64),
            # tf.keras.layers.Dropout(rate=0.2),
            #tf.keras.layers.ReLU(),
            tf.keras.layers.GlobalAvgPool2D(),   
            tf.keras.layers.Dense(output_shape, name='fc')])
        
        model.summary()
        self.model = model
        self.alpha = alpha
        self.final_sparsity = final_sparsity
        self.model_name = model_name.lower()
        if alpha != 1:
            self.model_name += '_ws' + str(alpha).split('.')[1]
        if final_sparsity is not None and 'lstm' not in self.model_name :
            self.model_name += '_mb' + str(final_sparsity).split('.')[1]
            self.magnitude_pruning = True
        else:
            self.magnitude_pruning = False
        
        self.final_sparsity = final_sparsity
        #print(self.magnitude_pruning)

    def compile_model(self, optimizer, loss_function, eval_metric):

        if self.magnitude_pruning:
            #sparsity scheduler
            pruning_params = {
                'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay( 
                                                                initial_sparsity=0.30,
                                                                final_sparsity=0.9,
                                                                begin_step=len(train_ds)*5,
                                                                end_step=len(train_ds)*25)
            }

            prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
            self.model = prune_low_magnitude(self.model, **pruning_params)

            input_shape = [32, 6, 2]
            self.model.build(input_shape)

        self.model.compile(
            optimizer = optimizer,
            loss = loss_function,
            metrics = eval_metric
        )


        
    def train_model(self,X_train, X_val, N_EPOCH, callbacks=[]):
        
        if self.magnitude_pruning:
            callbacks.append(tfmot.sparsity.keras.UpdatePruningStep())

        print('\tTraining... ')
        print('\t', end='')

        history = self.model.fit(
            X_train, 
            epochs=N_EPOCH, 
            validation_data =X_val, 
            verbose=1,
            callbacks=callbacks,
        )
            
        return history
    
    def evaluate_model(self, X_test):
        return self.model.evaluate(X_test)
        
        
    def get_model(self):
        return self.model
    
    def save_model(self, model_folder):
        
        run_model = tf.function(lambda x: self.model(x))
        concrete_func = run_model.get_concrete_function(tf.TensorSpec([1]+input_shape, tf.float32))
        self.model.save(model_folder, signatures=concrete_func)
        print(f'Model {self.model_name} saved at {model_folder}')

    def prune_model(self, pruned_model_dir, weights_only = True):
        
        if not os.path.isdir(pruned_model_dir):
            os.makedirs(pruned_model_dir)
        
        self.model = tfmot.sparsity.keras.strip_pruning(self.model)
        converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
        if weights_only:
            converter.optimizations = [tf.lite.Optimize.DEFAULT]
        tflite_model = converter.convert()
        with open(pruned_model_dir +'/saved_model.tflite', 'wb') as fp:
            tflite_compressed = zlib.compress(tflite_model) 
            fp.write(tflite_compressed)

        size_model = compute_size(pruned_model_dir)
        print(f'Size of the tflite {self.model_name}: {size_model} KB')


    
    def convert_to(self, model_folder, tflite=True, weights_only=True, weights_activation=True):
        
        #print(f'From {model_folder} to {tflite_model_dir}')
        size_original_model = compute_size(model_folder)
        print(f'Size of the original {self.model_name}: {size_original_model} KB')
        
        
        if tflite:  

            tflite_model_dir = os.path.join("./tflite_models", self.model_name)
            if not os.path.isdir(tflite_model_dir):
                os.makedirs(tflite_model_dir)
            # --------- with tflite conversion
            converter = tf.lite.TFLiteConverter.from_saved_model(model_folder)
            # convert the model into a tflite version
            tflite_model = converter.convert()
            # stored in tflite_model_dir


            with open(tflite_model_dir+'/saved_model.tflite', 'wb') as fp: 
                fp.write(tflite_model)

            size_tflite_model = compute_size(tflite_model_dir)
            print(f'Size of the tflite {self.model_name}: {size_tflite_model} KB')

        if weights_only:
            
            qtflite_model_dir = os.path.join("./weight_only_PTQ_models", self.model_name)
            if not os.path.isdir(qtflite_model_dir):
                os.makedirs(qtflite_model_dir)
            
            # --------- with tflite quantization weight only
            converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
            converter.optimizations = [tf.lite.Optimize.DEFAULT]
            # convert the model into a tflite version
            tflite_model = converter.convert()

            with open(qtflite_model_dir + '/saved_model.tflite', 'wb') as fp:
                fp.write(tflite_model)

            size_qtflite_model = compute_size(qtflite_model_dir)
            print(f'Size of the weight only quantization model {self.model_name}: {size_qtflite_model} KB')

        
        if weights_activation and not (self.model_name=='lstm' or self.model_name=='cnn') :
            
            qatflite_model_dir = os.path.join("./weight_activation_PTQ_models", self.model_name)
            if not os.path.isdir(qatflite_model_dir):
                os.makedirs(qatflite_model_dir)

            # ---------  with tflite quantization weight and activation
            converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
            converter.optimizations = [tf.lite.Optimize.DEFAULT]
            converter.representative_dataset = representative_dataset_gen

            # to force it to use only int8 ops, as well as int8 inputs and outputs
            converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
            converter.inference_input_type = tf.uint8
            converter.inference_output_type = tf.uint8
            # convert the model into a tflite version
            tflite_model = converter.convert()

            with open(qatflite_model_dir + '/saved_model.tflite', 'wb') as fp:
                fp.write(tflite_model)

            size_aqtflite_model = compute_size(qatflite_model_dir)
            print(f'Size of the weight and activation quantization model  {self.model_name}: {size_aqtflite_model} KB')

In [12]:
def compute_size(path_to_explore):
    size = 0
    for path in list(os.walk(path_to_explore)):
        
        root = path[0]
        files = path[2]
        for file in files:
            size += os.path.getsize(root + "/" + file)
        
    return round(size/1024, 3)

In [6]:
!pip install tensorflow-model-optimization
import tensorflow_model_optimization as tfmot

Collecting tensorflow-model-optimization
[?25l  Downloading https://files.pythonhosted.org/packages/55/38/4fd48ea1bfcb0b6e36d949025200426fe9c3a8bfae029f0973d85518fa5a/tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl (172kB)
[K     |██                              | 10kB 26.6MB/s eta 0:00:01[K     |███▉                            | 20kB 20.5MB/s eta 0:00:01[K     |█████▊                          | 30kB 16.8MB/s eta 0:00:01[K     |███████▋                        | 40kB 16.2MB/s eta 0:00:01[K     |█████████▌                      | 51kB 12.5MB/s eta 0:00:01[K     |███████████▍                    | 61kB 13.0MB/s eta 0:00:01[K     |█████████████▎                  | 71kB 12.7MB/s eta 0:00:01[K     |███████████████▏                | 81kB 12.8MB/s eta 0:00:01[K     |█████████████████               | 92kB 13.3MB/s eta 0:00:01[K     |███████████████████             | 102kB 13.7MB/s eta 0:00:01[K     |████████████████████▉           | 112kB 13.7MB/s eta 0:00:01[K  

In [13]:
step = 10
def scheduler(epoch, lr):
  if epoch == 20 or epoch == 25:
    return lr*0.1
  else:
    return lr


In [20]:
!rm -r models
!rm -r pruned_models/

N_EPOCH = 30
LR = 0.02
optimizer = tf.keras.optimizers.Adam(learning_rate=LR)
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
eval_metric = tf.keras.metrics.SparseCategoricalAccuracy()

for model_name in ['ds-cnn']:
    alpha = 0.3
    final_sparsity = None
    #checkpoints_dir = f'./chekpoints/{model.model_name}'
    # model_name, alpha, input_shape, output_shape, final_sparsity
    model = MyModel(model_name, alpha, input_shape, output_shape, final_sparsity)
    model.compile_model(optimizer, loss_function, eval_metric)
    history = model.train_model(train_ds, val_ds, N_EPOCH, callbacks=[tf.keras.callbacks.LearningRateScheduler(schedule=scheduler),
                                                                      tf.keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy',patience=30,restore_best_weights=True)])

    [test_loss, test_mae] = model.evaluate_model(test_ds)
    print(test_mae)
    model_dir = f'./models/{model.model_name}'
    model.save_model(model_dir)
    #model.convert_to(model_dir, tflite=False, weights_only=False, weights_activation=False)

    # magnitude based pruning
    pruned_model_dir = f'./pruned_models/{model.model_name}'
    model.prune_model(pruned_model_dir)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
first_conv1d (Conv2D)        (None, 24, 8, 76)         684       
_________________________________________________________________
batch_normalization_6 (Batch (None, 24, 8, 76)         304       
_________________________________________________________________
re_lu_6 (ReLU)               (None, 24, 8, 76)         0         
_________________________________________________________________
depthwise_conv2d_4 (Depthwis (None, 22, 6, 76)         684       
_________________________________________________________________
second_conv1d (Conv2D)       (None, 22, 6, 76)         5776      
_________________________________________________________________
batch_normalization_7 (Batch (None, 22, 6, 76)         304       
_________________________________________________________________
re_lu_7 (ReLU)               (None, 22, 6, 76)        

INFO:tensorflow:Assets written to: ./models/ds-cnn_ws3/assets


Model ds-cnn_ws3 saved at ./models/ds-cnn_ws3
INFO:tensorflow:Assets written to: /tmp/tmprb4cs435/assets


INFO:tensorflow:Assets written to: /tmp/tmprb4cs435/assets


Size of the tflite ds-cnn_ws3: 22.632 KB


In [16]:
import tensorflow.lite as tflite

In [23]:
with open(f'{pruned_model_dir}/saved_model.tflite', 'rb') as fp:
    model_zip = zlib.decompress(fp.read())
    interpreter = tflite.Interpreter(model_content=model_zip)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

    
    for length, (x, y) in enumerate(test_ds.unbatch().batch(1)):
        
        interpreter.set_tensor(input_details[0]['index'], x)
        interpreter.invoke()
        y_pred = interpreter.get_tensor(output_details[0]['index'])
        accuracy.update_state(y, y_pred)
        
print(accuracy.result().numpy())


0.93875


In [21]:
import argparse
import numpy as np
from subprocess import call
import tensorflow as tf
import time
from scipy import signal

parser = argparse.ArgumentParser()


#call('sudo sh -c "echo performance > /sys/devices/system/cpu/cpufreq/policy0/scaling_governor"',shell=True)

rate = 16000
length = 640
stride = 320
resize = 32
num_mel_bins = 40
num_coefficients = 10

num_frames = (rate - length) // stride + 1
num_spectrogram_bins = length // 2 + 1

linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, rate, 20, 4000)

with open(f'{pruned_model_dir}/saved_model.tflite', 'rb') as fp:
    model_zip = zlib.decompress(fp.read())

interpreter = tflite.Interpreter(model_content=model_zip)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


inf_latency = []
tot_latency = []
for i in range(100):
    sample = np.array(np.random.random_sample(48000), dtype=np.float32)

    start = time.time()

    # Resampling
    sample = signal.resample_poly(sample, 1, 48000 // rate)

    sample = tf.convert_to_tensor(sample, dtype=tf.float32)

    # STFT
    stft = tf.signal.stft(sample, length, stride,
            fft_length=length)
    spectrogram = tf.abs(stft)

    if use_mfccs is False:
        # Resize (optional)
        spectrogram = tf.reshape(spectrogram, [1, num_frames, num_spectrogram_bins, 1])
        spectrogram = tf.image.resize(spectrogram, [resize, resize])
        input_tensor = spectrogram
    else:
        # MFCC (optional)
        mel_spectrogram = tf.tensordot(spectrogram, linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :num_coefficients]
        mfccs = tf.reshape(mfccs, [1, num_frames, num_coefficients, 1])
        input_tensor = mfccs

    
    interpreter.set_tensor(input_details[0]['index'], input_tensor)
    start_inf = time.time()
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])

    end = time.time()
    tot_latency.append(end - start)

   
    inf_latency.append(end - start_inf)
    time.sleep(0.1)

print('Inference Latency {:.2f}ms'.format(np.mean(inf_latency)*1000.))
print('Total Latency {:.2f}ms'.format(np.mean(tot_latency)*1000.))

Inference Latency 0.45ms
Total Latency 8.82ms
