# Implement Post Conv2D Layers

In [106]:
import pathlib
import numpy as np
import math
import h5py
from datetime import datetime

import tensorflow as tf 
from tensorflow.keras import layers

In [107]:
data_dir = pathlib.Path('data')

In [108]:
def get_waveform(wavfile):
    x = tf.io.read_file(str(wavfile))
    x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000)
    return tf.squeeze(x, axis=-1)


def get_spectrogram(waveform):
    # Convert the waveform to a spectrogram via a STFT
    spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)

    # Obtain the magnitude of the STFT
    spectrogram = tf.abs(spectrogram)

    # Add a 'channels' dimension, so that the spectrogram can be used as an
    # image-like input data w/ convolution layers, which expect shape
    # (batch_size, height, width, channels)
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

def relu(x):
    return x.clip(0.0)

## Get the input data

In [109]:
waveform = get_waveform(data_dir/'test/h_yes.wav')
spec = get_spectrogram(waveform)
input_data = spec[tf.newaxis,...]
input_data.shape, input_data[0,0,:10,:]

(TensorShape([1, 124, 129, 1]),
 <tf.Tensor: shape=(10, 1), dtype=float32, numpy=
 array([[0.00021577],
        [0.00242205],
        [0.00827248],
        [0.01238501],
        [0.01051275],
        [0.01144113],
        [0.01872324],
        [0.02427843],
        [0.04116756],
        [0.09470174]], dtype=float32)>)

## Load the model

In [110]:
h5_model = tf.keras.models.load_model('simple_audio.h5')
h5_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 32, 32, 1)         0         
                                                                 
 normalization (Normalizatio  (None, 32, 32, 1)        3         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0

## Run through the Conv2D layers

In [111]:
# Downsample the input
l1 = h5_model.layers[0]
l1_out = l1(input_data)

# Normalize
l2 = h5_model.layers[1]
l2_out = l2(l1_out.numpy())

# Conv2D
l3 = h5_model.layers[2]
l3_out = l3(l2_out.numpy())

# Conv2D 1
l4 = h5_model.layers[3]
l4_out = l4(l3_out.numpy())

In [112]:
print(l4_out.shape)
l4_out[0,0,0,:]

(1, 28, 28, 64)


<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([0.        , 0.        , 0.        , 0.        , 0.49769944,
       0.        , 0.15165518, 0.        , 0.31784678, 0.42159253,
       0.        , 0.3958354 , 0.        , 0.        , 0.09659711,
       0.        , 0.        , 0.        , 0.        , 0.10127787,
       0.2358518 , 0.        , 0.13675866, 0.15558577, 0.18190861,
       0.        , 0.09301445, 0.08088657, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.25600177, 0.        , 0.4070488 , 0.        ,
       0.        , 0.        , 0.26512337, 0.        , 0.        ,
       0.        , 0.15896116, 0.        , 0.19394155, 0.25666392,
       0.        , 0.06679156, 0.        , 0.14209348, 0.27046356,
       0.        , 0.        , 0.02366456, 0.13287333, 0.        ,
       0.        , 0.        , 0.        , 0.00255454], dtype=float32)>

## MaxPooling2D

### TF model output

In [113]:
# MaxPooling2D
l5 = h5_model.layers[4]
l5_out = l5(l4_out.numpy())

In [114]:
print(l5_out.shape)
l5_out[0,0,0,:]

(1, 14, 14, 64)


<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([0.19207872, 0.        , 0.        , 0.        , 0.49769944,
       0.39563945, 0.15165518, 0.21508202, 0.31784678, 0.42159253,
       0.        , 0.3958354 , 0.        , 0.04460957, 0.3934646 ,
       0.03404821, 0.        , 0.0322025 , 0.00269348, 0.12552783,
       0.2566957 , 0.        , 0.20957533, 0.30595195, 0.25312927,
       0.        , 0.09301445, 0.08088657, 0.        , 0.05764907,
       0.05090094, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.25771794, 0.28490397, 0.50668806, 0.        ,
       0.        , 0.        , 0.26512337, 0.05124837, 0.01706564,
       0.        , 0.15896116, 0.        , 0.33742458, 0.33704227,
       0.        , 0.37610823, 0.18524006, 0.14209348, 0.36792484,
       0.        , 0.        , 0.07485493, 0.13287333, 0.        ,
       0.        , 0.27662134, 0.        , 0.00255454], dtype=float32)>

### Implement MaxPooling2D

In [115]:
def max_pooling_2d(input):
    groups, rows, cols, channels = input.shape
    out_rows = rows // 2
    out_cols = cols // 2
    output = np.zeros([groups, out_rows, out_cols, channels])  # assumes the previous layer's activation is relu
    
    for g in range(groups):
        for out_r in range(out_rows):
            in_r = out_r * 2
            for out_c in range(out_cols):
                in_c = out_c * 2
                for ch in range(channels):
                    for r in range(2):
                        for c in range(2):
                            in_val = input[g, in_r + r, in_c + c, ch]
                            if in_val > output[g,out_r,out_c,ch]:
                                output[g,out_r,out_c,ch] = in_val

    return output

# verify values
def verify_arrays(a1, a2, tolerance=1e-5):
    shapes_equal = a1.shape == a2.shape
    print('Shapes match: %s' % str(shapes_equal))
    if not shapes_equal:
        return

    print('Values match: %s' % str(np.all(a1 - a2 < tolerance)))

In [116]:
mp_out = max_pooling_2d(l4_out.numpy())
mp_out[0,0,0,:],l5_out[0,0,0,:]

(array([0.19207872, 0.        , 0.        , 0.        , 0.49769944,
        0.39563945, 0.15165518, 0.21508202, 0.31784678, 0.42159253,
        0.        , 0.3958354 , 0.        , 0.04460957, 0.3934646 ,
        0.03404821, 0.        , 0.0322025 , 0.00269348, 0.12552783,
        0.25669569, 0.        , 0.20957533, 0.30595195, 0.25312927,
        0.        , 0.09301445, 0.08088657, 0.        , 0.05764907,
        0.05090094, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.25771794, 0.28490397, 0.50668806, 0.        ,
        0.        , 0.        , 0.26512337, 0.05124837, 0.01706564,
        0.        , 0.15896116, 0.        , 0.33742458, 0.33704227,
        0.        , 0.37610823, 0.18524006, 0.14209348, 0.36792484,
        0.        , 0.        , 0.07485493, 0.13287333, 0.        ,
        0.        , 0.27662134, 0.        , 0.00255454]),
 <tf.Tensor: shape=(64,), dtype=float32, numpy=
 array([0.19207872, 0.        , 0.        , 0.        , 0.49769944,
        0.

In [117]:
verify_arrays(mp_out, l5_out)

Shapes match: True
Values match: True


## Dropout 0.25
### Not applied, only used for training

In [118]:
# MaxPooling2D
l6 = h5_model.layers[5]
l6_out = l6(l5_out.numpy())
l6_out[0,0,0,:]

<tf.Tensor: shape=(64,), dtype=float32, numpy=
array([0.19207872, 0.        , 0.        , 0.        , 0.49769944,
       0.39563945, 0.15165518, 0.21508202, 0.31784678, 0.42159253,
       0.        , 0.3958354 , 0.        , 0.04460957, 0.3934646 ,
       0.03404821, 0.        , 0.0322025 , 0.00269348, 0.12552783,
       0.2566957 , 0.        , 0.20957533, 0.30595195, 0.25312927,
       0.        , 0.09301445, 0.08088657, 0.        , 0.05764907,
       0.05090094, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.25771794, 0.28490397, 0.50668806, 0.        ,
       0.        , 0.        , 0.26512337, 0.05124837, 0.01706564,
       0.        , 0.15896116, 0.        , 0.33742458, 0.33704227,
       0.        , 0.37610823, 0.18524006, 0.14209348, 0.36792484,
       0.        , 0.        , 0.07485493, 0.13287333, 0.        ,
       0.        , 0.27662134, 0.        , 0.00255454], dtype=float32)>

## Flatten

In [119]:
l7 = h5_model.layers[6]
l7_out = l7(l6_out.numpy())
print(l7_out.shape)
l7_out.numpy()[0,:32]

(1, 12544)


array([0.19207872, 0.        , 0.        , 0.        , 0.49769944,
       0.39563945, 0.15165518, 0.21508202, 0.31784678, 0.42159253,
       0.        , 0.3958354 , 0.        , 0.04460957, 0.3934646 ,
       0.03404821, 0.        , 0.0322025 , 0.00269348, 0.12552783,
       0.2566957 , 0.        , 0.20957533, 0.30595195, 0.25312927,
       0.        , 0.09301445, 0.08088657, 0.        , 0.05764907,
       0.05090094, 0.        ], dtype=float32)

## Dense 128

### TF model output

In [120]:
# dense 128
l8 = h5_model.layers[7]
l8_out = l8(l7_out.numpy())
print(l8_out.shape)
l8_out.numpy()[0,:32]

(1, 128)


array([0.       , 0.       , 0.       , 0.       , 2.490368 , 0.       ,
       0.       , 0.       , 2.080639 , 0.       , 0.       , 0.       ,
       2.874687 , 1.19669  , 0.       , 0.       , 2.545947 , 0.       ,
       0.       , 1.5560516, 0.       , 0.       , 0.9972665, 0.       ,
       0.       , 0.4105076, 0.       , 0.       , 0.3358364, 2.905886 ,
       0.       , 0.       ], dtype=float32)

### Implement Dense

In [142]:
def dense(input, weights, biases, activation=None):
    n_inputs = input.shape[1]
    n_outputs = weights.shape[1]
    
    output = np.zeros([1, n_outputs])

    for out_i in range(n_outputs):
        sum = np.sum(input[0,:] * weights[:,out_i]) + biases[out_i]
        if activation:
            output[0,out_i] = activation(sum)
        else:
            output[0,out_i] = sum
    
    return output

In [143]:
d_out = dense(l7_out.numpy(), l8.weights[0].numpy(), l8.weights[1].numpy(), activation=relu)
d_out[0,:32]

array([0.        , 0.        , 0.        , 0.        , 2.49036813,
       0.        , 0.        , 0.        , 2.08063984, 0.        ,
       0.        , 0.        , 2.87468958, 1.19668984, 0.        ,
       0.        , 2.54594612, 0.        , 0.        , 1.55605149,
       0.        , 0.        , 0.99726647, 0.        , 0.        ,
       0.41050842, 0.        , 0.        , 0.33583611, 2.90588427,
       0.        , 0.        ])

In [144]:
verify_arrays(d_out, l8_out.numpy())

Shapes match: True
Values match: True


## Dense 8

### TF model output

In [50]:
# dropout 0.5
# Not applied, only used for training
l9 = h5_model.layers[8]
l9_out = l9(l8_out.numpy())
print(l9_out.shape)
l9_out.numpy()[0,:32]

(1, 128)


array([0.       , 0.       , 0.       , 0.       , 2.490368 , 0.       ,
       0.       , 0.       , 2.080639 , 0.       , 0.       , 0.       ,
       2.874687 , 1.19669  , 0.       , 0.       , 2.545947 , 0.       ,
       0.       , 1.5560516, 0.       , 0.       , 0.9972665, 0.       ,
       0.       , 0.4105076, 0.       , 0.       , 0.3358364, 2.905886 ,
       0.       , 0.       ], dtype=float32)

In [51]:
# dense 8
l10 = h5_model.layers[9]
l10_out = l10(l9_out.numpy())
print(l10_out.shape)
l10_out.numpy()[0,:32]

(1, 8)


array([-1.4015254 , -0.47578493, -0.17762837, -1.3689332 ,  0.02886982,
       -4.9320793 , -4.903058  ,  4.8657074 ], dtype=float32)

### Apply the implemented dense

In [145]:
l_out = dense(l9_out.numpy(), l10.weights[0].numpy(), l10.weights[1].numpy())
l_out[0,:32]

array([-1.40152538, -0.47578511, -0.17762849, -1.3689332 ,  0.02887006,
       -4.93207932, -4.90305758,  4.86570692])

In [146]:
verify_arrays(l_out, l10_out.numpy())

Shapes match: True
Values match: True


## Get the output label

In [52]:
label_names = ['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']
label_names

['down', 'go', 'left', 'no', 'right', 'stop', 'up', 'yes']

In [55]:
label_names[l10_out.numpy().argmax()]

'yes'