In [1]:
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import librosa
from IPython import display
import sys
sys.path.append("../")
from DSP_prototype.DSP_algorithms import stft
import tensorflow as tf
from tensorflow import keras
import tensorflow_model_optimization as tfmot
import os
import csv
import tempfile

2025-01-17 23:42:15.728010: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737153735.743143   58901 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737153735.747823   58901 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-17 23:42:15.762900: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


In [3]:
sample_rate = 16000
label_strings = ["down", "go", "left", "no", "off", "on", "right", "stop", "up", "yes", "silence", "unknown"]
selected_labels = [1, 2, 3, 6, 7, 9, 10, 11]
selected_labels_dict = {x:i for i, x in enumerate(selected_labels)}
NUM_CLASSES = len(selected_labels)
commands = [label_strings[x] for x in selected_labels]
selected_labels_str = [label_strings[x] for x in selected_labels]
prediction_dict = {i: label for i, label in enumerate(selected_labels_str)}
prediction_dict_reversed = {label: i for i, label in enumerate(selected_labels_str)}

In [49]:
dataset, info = tfds.load('speech_commands', with_info=True, as_supervised=True, split="test+validation+train")
print(f"Full dataset length: {len(dataset)}")

exclude_classes = [x for x in range(12) if x not in selected_labels]
dataset = dataset.shuffle(10000, seed=2137)

Full dataset length: 100503


In [6]:
def split_data_and_labels(dataset, length):
    data = []
    labels = []
    i = 0
    unknown_counter = 0
    for feature, label in dataset:
        if i == length:
            break
            
        pcm = feature.numpy()
        pcm_padded = np.pad(pcm, (0, sample_rate - len(pcm)), 'constant', constant_values=0)
        pcm_padded = pcm_padded.astype(np.float32)
        pcm_padded = librosa.resample(pcm_padded, orig_sr=16e3, target_sr=8e3)
        # pcm_padded += np.random.uniform(0, 200, size=(pcm_padded.size))
        
        label = int(label.numpy())
        if label in selected_labels:
            if label == 11 and unknown_counter < 5:
                unknown_counter += 1
                
            if (label == 11 and unknown_counter == 5) or label != 11:
                data.append(pcm_padded)
                labels.append(selected_labels_dict.get(label))
                i += 1
                unknown_counter = 0
    return np.vstack(data), np.vstack(labels)


In [50]:
data, labels = split_data_and_labels(dataset, len(dataset))
NEW_DATASET_LENGTH = len(data)
print(f"Filtered length: {NEW_DATASET_LENGTH}")

2025-01-18 00:24:21.857748: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Filtered length: 30069


In [51]:
TRAIN_PART = 0.7
VAL_PART = 0.1
TEST_PART = 0.2

train_data = data[:int(NEW_DATASET_LENGTH*TRAIN_PART)]
train_labels = labels[:int(NEW_DATASET_LENGTH*TRAIN_PART)]
TRAIN_LEN = len(train_data)

val_data = data[TRAIN_LEN:TRAIN_LEN+int(NEW_DATASET_LENGTH*VAL_PART)]
val_labels = labels[TRAIN_LEN:TRAIN_LEN+int(NEW_DATASET_LENGTH*VAL_PART)]
VAL_LEN = len(val_data)

test_data = data[TRAIN_LEN+VAL_LEN:]
test_labels = labels[TRAIN_LEN+VAL_LEN:]
TEST_LEN = len(test_data)

print(f"Train data length: {TRAIN_LEN}")
print(f"Validation data length: {VAL_LEN}")
print(f"Test data length: {TEST_LEN}")
print(f"Full dataset length: {NEW_DATASET_LENGTH}")

Train data length: 21048
Validation data length: 3006
Test data length: 6015
Full dataset length: 30069


In [52]:
train_data = np.array([np.abs(stft(x, fs=8e3, N=256, hop_size=128)[0]) for x in train_data])
valid_data = np.array([np.abs(stft(x, fs=8e3, N=256, hop_size=128)[0]) for x in val_data])
test_data = np.array([np.abs(stft(x, fs=8e3, N=256, hop_size=128)[0]) for x in test_data])

# Convert to melscale

In [31]:
# mel_filter = librosa.filters.mel(sr=8e3, n_fft=256, n_mels=40)
# train_data = np.array([np.dot(mel_filter, x) for x in train_data])
# valid_data = np.array([np.dot(mel_filter, x) for x in valid_data])
# test_data = np.array([np.dot(mel_filter, x) for x in test_data])


In [53]:
train_data = train_data.reshape(len(train_data), train_data[0].shape[0], train_data[0].shape[1], 1)
valid_data = valid_data.reshape(len(valid_data), valid_data[0].shape[0], valid_data[0].shape[1], 1)
test_data = test_data.reshape(len(test_data), test_data[0].shape[0], test_data[0].shape[1], 1)

In [54]:
train_data = tf.image.resize(train_data, [32, 32])
valid_data = tf.image.resize(valid_data, [32, 32])
test_data = tf.image.resize(test_data, [32, 32])

2025-01-18 00:24:36.437929: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1325013696 exceeds 10% of free system memory.


In [None]:
np.max(train_data)

# Load model for prune-tuning

In [79]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
pruning_params_sparsity_0_5 = {
    'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(target_sparsity=0.5,
                                                              begin_step=0,
                                                              frequency=100)
}

pruning_params_2_by_4 = {
    'sparsity_m_by_n': (2, 4),
}

input_shape = train_data[0].shape  # This will now be (129, 126, 1)
print('Input shape:', input_shape)

model = keras.models.Sequential([
    keras.layers.Input(shape=input_shape),
    keras.layers.BatchNormalization(),
    prune_low_magnitude(keras.layers.Conv2D(16, 3, activation=None), **pruning_params_sparsity_0_5),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    prune_low_magnitude(keras.layers.Conv2D(32, 3, activation=None), **pruning_params_sparsity_0_5),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.MaxPooling2D((2,2)),
    keras.layers.Dropout(0.3),
    prune_low_magnitude(keras.layers.Conv2D(32, 3, activation=None), **pruning_params_sparsity_0_5),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    prune_low_magnitude(keras.layers.Conv2D(64, 3, activation=None), **pruning_params_sparsity_0_5),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    prune_low_magnitude(keras.layers.Conv2D(128, 1, activation=None), **pruning_params_sparsity_0_5),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.GlobalMaxPooling2D(),
    prune_low_magnitude(keras.layers.Dense(128, activation='relu'), **pruning_params_2_by_4),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(NUM_CLASSES, activation='softmax'),
])

model.summary()

Input shape: (32, 32, 1)
Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_77 (Ba  (None, 32, 32, 1)         4         
 tchNormalization)                                               
                                                                 
 prune_low_magnitude_conv2d  (None, 30, 30, 16)        306       
 _87 (PruneLowMagnitude)                                         
                                                                 
 batch_normalization_78 (Ba  (None, 30, 30, 16)        64        
 tchNormalization)                                               
                                                                 
 activation_59 (Activation)  (None, 30, 30, 16)        0         
                                                                 
 prune_low_magnitude_conv2d  (None, 28, 28, 32)        9250      
 _88 (PruneLowMagnitude)    

In [81]:
logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
    tf.keras.callbacks.EarlyStopping(verbose=1, patience=4)
]

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy'],
)

train_labels = train_labels.astype(np.float32)
valid_labels = val_labels.astype(np.float32)

class_weights = {
    0: 2.0,
    1: 2.0,
    2: 2.0,
    3: 2.0,
    4: 1.0,
    5: 2.0,
    6: 1.0,
    7: 1.5
}

EPOCHS = 100
history = model.fit(train_data, train_labels,
    validation_data=(valid_data, valid_labels),
    epochs=EPOCHS,
    callbacks=callbacks,
    batch_size=1024,
    class_weight=class_weights
)

Epoch 1/100


E0000 00:00:1737157277.930998   58901 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_17/dropout_35/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer






Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 38: early stopping


In [82]:
model.evaluate(test_data, test_labels)



[0.3947451114654541, 0.8613466620445251]

In [83]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model)

In [84]:
model_for_export.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_77 (Ba  (None, 32, 32, 1)         4         
 tchNormalization)                                               
                                                                 
 conv2d_87 (Conv2D)          (None, 30, 30, 16)        160       
                                                                 
 batch_normalization_78 (Ba  (None, 30, 30, 16)        64        
 tchNormalization)                                               
                                                                 
 activation_59 (Activation)  (None, 30, 30, 16)        0         
                                                                 
 conv2d_88 (Conv2D)          (None, 28, 28, 32)        4640      
                                                                 
 batch_normalization_79 (Ba  (None, 28, 28, 32)      

In [85]:
model_for_export.save("tra_model_for_optimization5.h5")



