# Implement Post Conv2D Layers

In [1]:
import pathlib
import numpy as np
import math
import h5py
from datetime import datetime

import tensorflow as tf 
from tensorflow.keras import layers

2024-04-28 11:06:16.552647: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data_dir = pathlib.Path('data')

In [3]:
def get_waveform(wavfile):
    x = tf.io.read_file(str(wavfile))
    x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000)
    return tf.squeeze(x, axis=-1)


def get_spectrogram(waveform):
    # Convert the waveform to a spectrogram via a STFT
    spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)

    # Obtain the magnitude of the STFT
    spectrogram = tf.abs(spectrogram)

    # Add a 'channels' dimension, so that the spectrogram can be used as an
    # image-like input data w/ convolution layers, which expect shape
    # (batch_size, height, width, channels)
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

def relu(x):
    return x.clip(0.0)

## Get the input data

In [4]:
waveform = get_waveform(data_dir/'yes.wav')
spec = get_spectrogram(waveform)
input_data = spec[tf.newaxis,...]
input_data.shape, input_data[0,0,:10,:]

(TensorShape([1, 124, 129, 1]),
 <tf.Tensor: shape=(10, 1), dtype=float32, numpy=
 array([[0.00087561],
        [0.00134371],
        [0.00557508],
        [0.01203688],
        [0.01582851],
        [0.01979508],
        [0.03313684],
        [0.05369601],
        [0.05009932],
        [0.03737277]], dtype=float32)>)

## Load the model

In [5]:
h5_model = tf.keras.models.load_model('simple-sr.h5')
h5_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 32, 32, 1)         0         
                                                                 
 normalization (Normalizatio  (None, 32, 32, 1)        3         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0

## Run through the Conv2D layers

In [6]:
# Downsample the input
l1 = h5_model.layers[0]
l1_out = l1(input_data)

# Normalize
l2 = h5_model.layers[1]
l2_out = l2(l1_out.numpy())

# Conv2D
l3 = h5_model.layers[2]
l3_out = l3(l2_out.numpy())

# Conv2D 1
l4 = h5_model.layers[3]
l4_out = l4(l3_out.numpy())

In [7]:
print(l4_out.shape)
l4_out[0,0,0,:]

(1, 28, 28, 64)


<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([0.        , 0.13606277, 0.1669277 , 0.        , 0.02371086,
       0.        , 0.        , 0.02051658, 0.        , 0.        ,
       0.        , 0.01705355, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.01502133, 0.        , 0.2531605 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.185608  , 0.        , 0.        , 0.        , 0.14853501,
       0.19258422, 0.19711518, 0.2783924 , 0.        , 0.        ,
       0.        , 0.06133013, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.1239159 , 0.00247141, 0.31450033, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.1799986 ], dtype=float32)>

## MaxPooling2D

### TF model output

In [8]:
# MaxPooling2D
l5 = h5_model.layers[4]
l5_out = l5(l4_out.numpy())

In [9]:
print(l5_out.shape)
l5_out[0,0,0,:]

(1, 14, 14, 64)


<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([0.        , 0.13606277, 0.19537322, 0.        , 0.02371086,
       0.        , 0.        , 0.04346997, 0.        , 0.        ,
       0.0165835 , 0.01705355, 0.        , 0.        , 0.02385177,
       0.01397846, 0.18305242, 0.        , 0.        , 0.        ,
       0.        , 0.11963765, 0.        , 0.2531605 , 0.        ,
       0.17020133, 0.        , 0.        , 0.02231221, 0.        ,
       0.        , 0.        , 0.0161083 , 0.        , 0.17712165,
       0.3021493 , 0.        , 0.        , 0.        , 0.14853501,
       0.19258422, 0.19711518, 0.2783924 , 0.        , 0.00836945,
       0.        , 0.06133013, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.08210312, 0.        , 0.        ,
       0.1239159 , 0.16630794, 0.31450033, 0.        , 0.        ,
       0.        , 0.11881111, 0.        , 0.1799986 ], dtype=float32)>

### Implement MaxPooling2D

In [29]:
def max_pooling_2d(input):
    groups, rows, cols, channels = input.shape
    out_rows = rows // 2
    out_cols = cols // 2

    output = np.zeros([groups, out_rows, out_cols, channels])
    
    for g in range(groups):
        for out_r in range(out_rows):
            in_r = out_r * 2
            for out_c in range(out_cols):
                in_c = out_c * 2
                for ch in range(channels):
                    max_val = input[g, in_r, in_c, ch]
                    for r in range(2):
                        for c in range(2):
                            in_val = input[g, in_r + r, in_c + c, ch]
                            if in_val > max_val:
                                max_val = in_val
                    output[g,out_r,out_c,ch] = max_val

    return output

# verify values
def verify_arrays(a1, a2, tolerance=1e-5):
    shapes_equal = a1.shape == a2.shape
    print('Shapes match: %s' % str(shapes_equal))
    if not shapes_equal:
        return

    print('Values match: %s' % str(np.all(a1 - a2 < tolerance)))

In [30]:
mp_out = max_pooling_2d(l4_out.numpy())
mp_out[0,0,0,:],l5_out[0,0,0,:]

(array([0.        , 0.13606277, 0.19537322, 0.        , 0.02371086,
        0.        , 0.        , 0.04346997, 0.        , 0.        ,
        0.0165835 , 0.01705355, 0.        , 0.        , 0.02385177,
        0.01397846, 0.18305242, 0.        , 0.        , 0.        ,
        0.        , 0.11963765, 0.        , 0.25316051, 0.        ,
        0.17020133, 0.        , 0.        , 0.02231221, 0.        ,
        0.        , 0.        , 0.0161083 , 0.        , 0.17712165,
        0.3021493 , 0.        , 0.        , 0.        , 0.14853501,
        0.19258422, 0.19711518, 0.2783924 , 0.        , 0.00836945,
        0.        , 0.06133013, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.08210312, 0.        , 0.        ,
        0.1239159 , 0.16630794, 0.31450033, 0.        , 0.        ,
        0.        , 0.11881111, 0.        , 0.17999861]),
 <tf.Tensor: shape=(64,), dtype=float32, numpy=
 array([0.        , 0.13606277, 0.19537322, 0.        , 0.02371086,
        0.

In [31]:
verify_arrays(mp_out, l5_out)

Shapes match: True
Values match: True


## Dropout 0.25
### Not applied, only used for training

In [14]:
# MaxPooling2D
l6 = h5_model.layers[5]
l6_out = l6(l5_out.numpy())
l6_out[0,0,0,:]

<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([0.        , 0.13606277, 0.19537322, 0.        , 0.02371086,
       0.        , 0.        , 0.04346997, 0.        , 0.        ,
       0.0165835 , 0.01705355, 0.        , 0.        , 0.02385177,
       0.01397846, 0.18305242, 0.        , 0.        , 0.        ,
       0.        , 0.11963765, 0.        , 0.2531605 , 0.        ,
       0.17020133, 0.        , 0.        , 0.02231221, 0.        ,
       0.        , 0.        , 0.0161083 , 0.        , 0.17712165,
       0.3021493 , 0.        , 0.        , 0.        , 0.14853501,
       0.19258422, 0.19711518, 0.2783924 , 0.        , 0.00836945,
       0.        , 0.06133013, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.08210312, 0.        , 0.        ,
       0.1239159 , 0.16630794, 0.31450033, 0.        , 0.        ,
       0.        , 0.11881111, 0.        , 0.1799986 ], dtype=float32)>

## Flatten

In [15]:
l7 = h5_model.layers[6]
l7_out = l7(l6_out.numpy())
print(l7_out.shape)
l7_out.numpy()[0,:32]

(1, 12544)


array([0.        , 0.13606277, 0.19537322, 0.        , 0.02371086,
       0.        , 0.        , 0.04346997, 0.        , 0.        ,
       0.0165835 , 0.01705355, 0.        , 0.        , 0.02385177,
       0.01397846, 0.18305242, 0.        , 0.        , 0.        ,
       0.        , 0.11963765, 0.        , 0.2531605 , 0.        ,
       0.17020133, 0.        , 0.        , 0.02231221, 0.        ,
       0.        , 0.        ], dtype=float32)

## Dense 128

### TF model output

In [16]:
# dense 128
l8 = h5_model.layers[7]
l8_out = l8(l7_out.numpy())
print(l8_out.shape)
l8_out.numpy()[0,:32]

(1, 128)


array([0.        , 0.        , 2.0636077 , 0.8027674 , 0.        ,
       0.        , 2.144656  , 0.4767597 , 0.8077969 , 0.        ,
       1.1524628 , 0.        , 0.        , 0.        , 0.92185646,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       1.5352435 , 0.8283296 , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.5560782 ], dtype=float32)

### Implement Dense

In [17]:
def dense(input, weights, biases, activation=None):
    n_inputs = input.shape[1]
    n_outputs = weights.shape[1]
    
    output = np.zeros([1, n_outputs])

    for out_i in range(n_outputs):
        sum = np.sum(input[0,:] * weights[:,out_i]) + biases[out_i]
        if activation:
            output[0,out_i] = activation(sum)
        else:
            output[0,out_i] = sum
    
    return output

In [18]:
d_out = dense(l7_out.numpy(), l8.weights[0].numpy(), l8.weights[1].numpy(), activation=relu)
d_out[0,:32]

array([0.        , 0.        , 2.06360865, 0.80276829, 0.        ,
       0.        , 2.14465523, 0.47675997, 0.80779701, 0.        ,
       1.15246248, 0.        , 0.        , 0.        , 0.92185634,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       1.53524339, 0.82832944, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.55607867])

In [19]:
verify_arrays(d_out, l8_out.numpy())

Shapes match: True
Values match: True


## Dense 8

### TF model output

In [20]:
# dropout 0.5
# Not applied, only used for training
l9 = h5_model.layers[8]
l9_out = l9(l8_out.numpy())
print(l9_out.shape)
l9_out.numpy()[0,:32]

(1, 128)


array([0.        , 0.        , 2.0636077 , 0.8027674 , 0.        ,
       0.        , 2.144656  , 0.4767597 , 0.8077969 , 0.        ,
       1.1524628 , 0.        , 0.        , 0.        , 0.92185646,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       1.5352435 , 0.8283296 , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.5560782 ], dtype=float32)

In [21]:
# dense 8
l10 = h5_model.layers[9]
l10_out = l10(l9_out.numpy())
print(l10_out.shape)
l10_out.numpy()[0,:32]

(1, 8)


array([-3.16535  , -2.9853044,  1.9716071, -1.7369767, -4.632102 ,
       -1.3354614, -4.763162 ,  6.664624 ], dtype=float32)

### Apply the implemented dense

In [22]:
l_out = dense(l9_out.numpy(), l10.weights[0].numpy(), l10.weights[1].numpy())
l_out[0,:32]

array([-3.16534996, -2.98530436,  1.97160685, -1.73697674, -4.63210201,
       -1.3354615 , -4.76316118,  6.66462421])

In [23]:
verify_arrays(l_out, l10_out.numpy())

Shapes match: True
Values match: True


## Get the output label

In [24]:
label_names = ['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']
label_names

['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']

In [25]:
label_names[l10_out.numpy().argmax()]

'yes'