In [6]:
import argparse
import numpy as np
import os
import pandas as pd
import tensorflow as tf
import tensorflow.lite as tflite
from tensorflow import keras
import zlib
from platform import python_version
import tensorflow_model_optimization as tfmot   
import tempfile
print(f"Python version used to excute the code is {python_version()}")



Python version used to excute the code is 3.9.7


In [7]:
from classes import read_audios
from classes import SignalGenerator
from classes import make_models
from classes import model_analysis
from classes import latency

In [8]:
version = "a"
m = "cnn"   # model name [ mlp , cnn , ds_cnn  ]
mfcc = True    # True --> excute mfcc , False --> excute STFT
alpha = 0.3    # The width multiplier used to apply the structured Pruning 
epochs = 1

model_version = f"_V_{version}_alpha={alpha}"
mymodel = m + model_version
TFLITE =  f'{mymodel}.tflite'     # path for saving the best model after converted to TF.lite model 
units = 8                         # The number of output class [8:without silence , 9 : with silence]
################## Fix the Random seed to reproduce the same results 
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [9]:
reading_class = read_audios('I:\Polito\ML4IOT\CODES TO DO\Lab3\ex2\data\mini_speech_commands')
train_files, val_files, test_files = reading_class.read()

In [None]:
number_of_bins = [30,40]
lower_freq = [1000,2000]
upper_freq = [3000,4000]
sample_rate = [8000,16000]
df = pd.DataFrame(columns=['number_of_bins', 'lower_freq', 'upper_freq','sample_rate','acuracy','size','latency'])
for p1 in range(len(number_of_bins)):
    for p2 in range(len(lower_freq)):
        for p3 in range(len(upper_freq)):
            for p4 in range(len(sample_rate)):
                if((lower_freq[p2] < upper_freq[p3]) and (upper_freq[p3] <= sample_rate[p4]/2)):
                    data = [[number_of_bins[p1],lower_freq[p2],upper_freq[p3],sample_rate[p4],0,0]]
                    d = pd.DataFrame(data, columns=['number_of_bins', 'lower_freq', 'upper_freq','sample_rate','acuracy','size','latency'])
                    df = df.append(d, ignore_index = True)

In [None]:
LABELS = np.array(['stop', 'up', 'yes', 'right', 'left', 'no',  'down', 'go'] , dtype = str) 

STFT_OPTIONS = {'frame_length': 256, 'frame_step': 128, 'mfcc': False}
MFCC_OPTIONS = {'frame_length': 640, 'frame_step': 320, 'mfcc': True,
        'lower_frequency': 20, 'upper_frequency': 4000, 'num_mel_bins': 40,
        'num_coefficients': 10, 'sampling_rate': 16000}
if mfcc is True:
    options = MFCC_OPTIONS
    strides = [2, 1]
else:
    options = STFT_OPTIONS
    strides = [2, 2]

generator = SignalGenerator(LABELS, **options)
train_ds = generator.make_dataset(train_files, True)
val_ds = generator.make_dataset(val_files, False)
test_ds = generator.make_dataset(test_files, False)

model_maker = make_models()

############ Applying Structured-Based Pruning
model, model_checkpoint_callback, checkpoint_filepath = model_maker.models(alpha, strides, units, model_version, mfcc, mymodel,False,train_ds)

############ Applying Magnitude-Based Pruning
#model, model_checkpoint_callback, checkpoint_filepath = model_maker.models(1, strides, units, model_version, mfcc, mymodel)

history = model.fit(train_ds, epochs=epochs,   validation_data=val_ds,callbacks=[model_checkpoint_callback ])
model_maker.plot_loss(history, mymodel)

analysis = model_analysis(test_ds, checkpoint_filepath, train_ds)
Compressed , tflite_model_dir = analysis.S_pruning_Model_evaluate_and_compress_to_TFlite( tflite_model_dir = TFLITE)

acc, size = analysis.load_and_evaluation(tflite_model_dir, Compressed)

laten = latency()
inf, tot = laten.calculate(model = tflite_model_dir, mfcc = True ,rate = 16000, lower_frequency = 20, upper_frequency = 4000, num_mel_bins = 40)

In [26]:
%run kws_latency.py --model ./models/cnn_V_a_alpha=0.3.tflite --mfcc --rate 16000 --lower-frequency 20 --upper-frequency 4000 --bins 40

Inference Latency 2.67ms
Total Latency 72.43ms


In [25]:
laten = latency()
inf, tot = laten.calculate(model = './models/cnn_V_a_alpha=0.3.tflite', mfcc = True ,rate = 16000, lower_frequency = 20, upper_frequency = 4000, num_mel_bins = 40)

Inference Latency 2.09ms
Total Latency 55.03ms


In [None]:
Compressed , Quantized   = analysis.apply_Quantization(TFLITE, PQT=True , WAPQT = False)

the model is saved successfuly to ./models/PQT_cnn_V_a_alpha=0.3.tflite


In [None]:
analysis.load_and_evaluation(Quantized , Compressed)

************************************************** 
 The Size of TF lite model  Before compression is = 32.992 kb
************************************************** 
 The Size of TF lite model  After compression is = 28.237 kb
************************************************** 
 The accuracy of TF lite model is = 69.62 


In [None]:
WA_Compressed , WA_Quantized   = analysis.apply_Quantization(TFLITE, PQT=False ,WAPQT=True)

TypeError: 'generator' object is not callable

In [None]:
analysis.load_and_evaluation(WA_Quantized , WA_Compressed)