# Explore the simple-sr model layers

In [1]:
import os
import logging

import pathlib

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

2024-05-02 02:31:17.266046: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Import the saved model. Setup the paths and labels.

In [2]:
label_names = ['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']
data_dir = pathlib.Path('data')
imported = tf.saved_model.load('saved_v2')

In [3]:
def get_waveform(wavfile):
    x = tf.io.read_file(str(wavfile))
    x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000)
    return tf.squeeze(x, axis=-1)


def get_spectrogram(waveform):
    # Convert the waveform to a spectrogram via a STFT
    spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)

    # Obtain the magnitude of the STFT
    spectrogram = tf.abs(spectrogram)

    # Add a 'channels' dimension, so that the spectrogram can be used as an
    # image-like input data w/ convolution layers, which expect shape
    # (batch_size, height, width, channels)
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

## Get the spectrogram

In [25]:
waveform = get_waveform(data_dir/'yes.wav')
spectrogram = get_spectrogram(waveform)

## Load the model saved in HDF5 format

In [22]:
h5_model = tf.keras.models.load_model('simple-sr_v2.h5')
h5_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing_1 (Resizing)       (None, 64, 64, 1)         0         
                                                                 
 normalization_4 (Normalizat  (None, 64, 64, 1)        3         
 ion)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 62, 62, 32)        320       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 31, 31, 32)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 14, 14, 64)      

## Test the HDF5 model

In [56]:
input_data = spectrogram[tf.newaxis,...]
result = h5_model(input_data, training=False)
label = label_names[result.numpy().argmax()]
print('Prediction:', label)
result

Prediction: yes


<tf.Tensor: shape=(1, 8), dtype=float32, numpy=
array([[-2.6347573, -2.9110854,  2.067018 ,  1.7640504, -9.374807 ,
        -4.109325 , -7.983361 , 10.585677 ]], dtype=float32)>

## Explore the model layers

In [24]:
h5_model.layers

[<keras.layers.preprocessing.image_preprocessing.Resizing at 0x1381b5190>,
 <keras.layers.preprocessing.normalization.Normalization at 0x137b2b490>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x138179a10>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x138f03290>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x138ece7d0>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x138edfe10>,
 <keras.layers.convolutional.conv2d.Conv2D at 0x138edc650>,
 <keras.layers.pooling.max_pooling2d.MaxPooling2D at 0x138ee2710>,
 <keras.layers.regularization.dropout.Dropout at 0x138ee38d0>,
 <keras.layers.reshaping.flatten.Flatten at 0x138ebfd90>,
 <keras.layers.core.dense.Dense at 0x138ebb790>,
 <keras.layers.regularization.dropout.Dropout at 0x138f07b90>,
 <keras.layers.core.dense.Dense at 0x138f07510>]

## Input data

In [29]:
input_data.shape, input_data[0,0,:32,:].numpy().flatten()

(TensorShape([1, 124, 129, 1]),
 array([0.00087561, 0.00134371, 0.00557508, 0.01203688, 0.01582851,
        0.01979508, 0.03313684, 0.05369601, 0.05009932, 0.03737277,
        0.0312091 , 0.00657593, 0.00977934, 0.00414987, 0.01013514,
        0.03394571, 0.03811777, 0.05212038, 0.07652813, 0.09006108,
        0.06934591, 0.09528538, 0.09866577, 0.05166683, 0.03728538,
        0.02647714, 0.03595338, 0.03264239, 0.03675836, 0.05252869,
        0.05268585, 0.03330975], dtype=float32))

## Downsample

In [31]:
resize_layer = h5_model.layers[0]
resize_out = resize_layer(input_data)
resize_out.shape, resize_out[0,0,:32,:].numpy().flatten()

(TensorShape([1, 64, 64, 1]),
 array([0.00155563, 0.01450971, 0.01721152, 0.03461045, 0.04996715,
        0.06476526, 0.07763705, 0.04730197, 0.05492217, 0.07322476,
        0.09561469, 0.05896866, 0.04744271, 0.02823398, 0.06757322,
        0.05356693, 0.0716002 , 0.08599924, 0.10384966, 0.08202535,
        0.07202959, 0.06213472, 0.04379842, 0.02111142, 0.02182574,
        0.02863349, 0.05733498, 0.02389398, 0.05709929, 0.02924848,
        0.04101247, 0.05370279], dtype=float32))

## Normalize

In [32]:
norm_layer = h5_model.layers[1]
norm_out = norm_layer(resize_out.numpy())
norm_out.shape, norm_out[0,0,:16,:].numpy().flatten()

(TensorShape([1, 64, 64, 1]),
 array([-0.16206573, -0.145115  , -0.14157963, -0.1188127 , -0.09871808,
        -0.07935438, -0.06251133, -0.10220551, -0.0922343 , -0.06828492,
        -0.03898715, -0.08693938, -0.10202137, -0.12715647, -0.0756801 ,
        -0.09400767], dtype=float32))

## Conv2D 32 filters

In [34]:
conv2d_32_layer = h5_model.layers[2]
conv2d_32_out = conv2d_32_layer(norm_out.numpy())
conv2d_32_out.shape, conv2d_32_out[0,0,0,:32].numpy()

(TensorShape([1, 62, 62, 32]),
 array([0.00627902, 0.        , 0.02095808, 0.01433451, 0.        ,
        0.00889511, 0.0643165 , 0.03762578, 0.        , 0.02074122,
        0.00472939, 0.        , 0.01017278, 0.00777538, 0.01832406,
        0.        , 0.        , 0.01559877, 0.01055296, 0.00048798,
        0.03145018, 0.01437424, 0.00606427, 0.00207761, 0.01075713,
        0.02302407, 0.00345798, 0.00418103, 0.        , 0.00154825,
        0.03864209, 0.02948506], dtype=float32))

## MaxPooling2D 32 filters

In [36]:
mp_32_layer = h5_model.layers[3]
mp_32_out = mp_32_layer(conv2d_32_out.numpy())
mp_32_out.shape, mp_32_out[0,0,0,:32].numpy()

(TensorShape([1, 31, 31, 32]),
 array([0.01072034, 0.        , 0.02095808, 0.02108153, 0.        ,
        0.01503734, 0.0643165 , 0.03762578, 0.        , 0.0217897 ,
        0.00606663, 0.        , 0.02001369, 0.00966259, 0.03384773,
        0.        , 0.        , 0.03154362, 0.01652316, 0.01577898,
        0.03238086, 0.03042345, 0.02748973, 0.01532979, 0.0124368 ,
        0.0242172 , 0.01190388, 0.01375844, 0.        , 0.00669544,
        0.0389999 , 0.02948506], dtype=float32))

## Conv2D 64 filters

In [37]:
conv2d_64_layer = h5_model.layers[4]
conv2d_64_out = conv2d_64_layer(mp_32_out.numpy())
conv2d_64_out.shape, conv2d_64_out[0,0,0,:32].numpy()

(TensorShape([1, 29, 29, 64]),
 array([0.24887821, 0.26164997, 0.        , 0.        , 0.29989332,
        0.2574429 , 0.        , 0.        , 0.        , 0.11881153,
        0.        , 0.15549089, 0.        , 0.        , 0.        ,
        0.        , 0.34623715, 0.26356146, 0.        , 0.32560804,
        0.48643997, 0.        , 0.11214834, 0.        , 0.14620453,
        0.        , 0.        , 0.2733803 , 0.4414402 , 0.37427112,
        0.        , 0.23532963], dtype=float32))

## MaxPooling2D 64 filters

In [38]:
mp_64_layer = h5_model.layers[5]
mp_64_out = mp_64_layer(conv2d_64_out.numpy())
mp_64_out.shape, mp_64_out[0,0,0,:32].numpy()

(TensorShape([1, 14, 14, 64]),
 array([0.3395589 , 0.36666068, 0.12057038, 0.17979348, 0.5102749 ,
        0.2574429 , 0.        , 0.        , 0.05522262, 0.11881153,
        0.        , 0.15549089, 0.        , 0.19072533, 0.        ,
        0.        , 0.34623715, 0.26356146, 0.29949617, 0.32560804,
        0.48643997, 0.09932759, 0.2327676 , 0.        , 0.3357129 ,
        0.        , 0.        , 0.2733803 , 0.55575466, 0.37427112,
        0.        , 0.23532963], dtype=float32))

## Conv2D 128 filters

In [39]:
conv2d_128_layer = h5_model.layers[6]
conv2d_128_out = conv2d_128_layer(mp_64_out.numpy())
conv2d_128_out.shape, conv2d_128_out[0,0,0,:32].numpy()

(TensorShape([1, 12, 12, 128]),
 array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.00907242, 0.        , 0.        ,
        0.        , 0.25049394, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.10954612, 0.        , 0.        ,
        0.        , 0.1922076 , 0.        , 0.65980977, 0.        ,
        0.        , 0.        ], dtype=float32))

## MaxPooling2D 128 filters

In [40]:
mp_128_layer = h5_model.layers[7]
mp_128_out = mp_128_layer(conv2d_128_out.numpy())
mp_128_out.shape, mp_128_out[0,0,0,:32].numpy()

(TensorShape([1, 6, 6, 128]),
 array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.05285494,
        0.        , 0.        , 0.02493029, 0.09165753, 0.        ,
        0.        , 0.25049394, 0.15503892, 0.05853331, 0.        ,
        0.11574493, 0.        , 0.10954612, 0.        , 0.        ,
        0.        , 0.23547164, 0.        , 0.65980977, 0.        ,
        0.        , 0.        ], dtype=float32))

## Dropout

In [46]:
dropout_layer = h5_model.layers[8]
dropout_out = dropout_layer(mp_128_out.numpy())
dropout_out.shape, dropout_out[0,0,0,:32].numpy()

(TensorShape([1, 6, 6, 128]),
 array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.05285494,
        0.        , 0.        , 0.02493029, 0.09165753, 0.        ,
        0.        , 0.25049394, 0.15503892, 0.05853331, 0.        ,
        0.11574493, 0.        , 0.10954612, 0.        , 0.        ,
        0.        , 0.23547164, 0.        , 0.65980977, 0.        ,
        0.        , 0.        ], dtype=float32))

## Flatten

In [49]:
flatten_layer = h5_model.layers[9]
flatten_out = flatten_layer(dropout_out.numpy())
flatten_out.shape, flatten_out[0,:32].numpy()

(TensorShape([1, 4608]),
 array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.05285494,
        0.        , 0.        , 0.02493029, 0.09165753, 0.        ,
        0.        , 0.25049394, 0.15503892, 0.05853331, 0.        ,
        0.11574493, 0.        , 0.10954612, 0.        , 0.        ,
        0.        , 0.23547164, 0.        , 0.65980977, 0.        ,
        0.        , 0.        ], dtype=float32))

## Dense 128

In [50]:
d_128_layer = h5_model.layers[10]
d_128_out = d_128_layer(flatten_out.numpy())
d_128_out.shape, d_128_out[0,:32].numpy()

(TensorShape([1, 128]),
 array([0.        , 3.2486835 , 0.        , 4.235691  , 3.8311493 ,
        3.3287628 , 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.2558393 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 1.1135519 , 0.        , 0.        ,
        0.        , 0.        , 0.03024962, 0.846521  , 0.        ,
        1.3403792 , 0.289591  , 2.0219226 , 0.        , 0.2902855 ,
        0.        , 0.11955655], dtype=float32))

## Dropout

In [51]:
dropout_layer = h5_model.layers[11]
dropout_out = dropout_layer(d_128_out.numpy())
dropout_out.shape, dropout_out[0,:32].numpy()

(TensorShape([1, 128]),
 array([0.        , 3.2486835 , 0.        , 4.235691  , 3.8311493 ,
        3.3287628 , 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.2558393 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 1.1135519 , 0.        , 0.        ,
        0.        , 0.        , 0.03024962, 0.846521  , 0.        ,
        1.3403792 , 0.289591  , 2.0219226 , 0.        , 0.2902855 ,
        0.        , 0.11955655], dtype=float32))

## Dense 8

In [54]:
d_8_layer = h5_model.layers[12]
d_8_out = d_8_layer(dropout_out.numpy())
d_8_out.shape, d_8_out.numpy()

(TensorShape([1, 8]),
 array([[-2.6347573, -2.9110854,  2.067018 ,  1.7640504, -9.374807 ,
         -4.109325 , -7.983361 , 10.585677 ]], dtype=float32))

In [55]:
label = label_names[d_8_out.numpy().argmax()]
print('Prediction:', label)

Prediction: yes
