In [1]:
import os
import pathlib
import wave

import glob
import random

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [2]:
# shuffle filenames

data_dir = "recordings"

filenames = glob.glob(str(data_dir) + "/*")
random.shuffle(filenames)
num_samples = len(filenames)

print(num_samples)
print(filenames[:5])

3000
['recordings/8_yweweler_19.wav', 'recordings/9_george_10.wav', 'recordings/8_nicolas_21.wav', 'recordings/8_yweweler_8.wav', 'recordings/4_george_1.wav']


In [14]:
# https://www.tensorflow.org/tutorials/audio/simple_audio

def decode_audio(file_path):
    # Read file to get buffer                                                                                               
    ifile = wave.open(file_path)
    samples = ifile.getnframes()
    audio = ifile.readframes(samples)

    # Convert buffer to float32 using NumPy                                                                                 
    audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
    audio_as_np_float32 = audio_as_np_int16.astype(np.float32)

    # Normalise float32 array so that values are between -1.0 and +1.0                                                      
    max_int16 = 2**15
    audio_normalized = audio_as_np_float32 / max_int16
        
    return audio_normalized

def get_label(file_path):
    parts = file_path.split("/")
    
    label = int(parts[1].split("_")[0])
    
    # Note: You'll use indexing here instead of tuple unpacking to enable this
    # to work in a TensorFlow graph.
    return label

def get_waveform_and_label(file_path):
    label = get_label(file_path)
    waveform = decode_audio(file_path)
    return waveform, label

In [26]:
labeled = []

lengths = np.array([])

for file_path in filenames:
    x_val = decode_audio(file_path)
    y_val = get_label(file_path)
    labeled.append((x_val, y_val))
    lengths = np.append(lengths, x_val.shape[0])

max_length = int(np.mean(lengths) + 3 * np.std(lengths))
print(np.mean(lengths))
print(np.std(lengths))
print(max_length)

3499.4746666666665
1180.9471707171701
7042


In [50]:
# padding function from
# https://towardsdatascience.com/audio-deep-learning-made-simple-sound-classification-step-by-step-cebc936bbe5

X_full = []
y_full = []

numbers = [0] * 10

for x_val, y_val in labeled:
    signal_length = x_val.shape[0]
    if signal_length > max_length:
        numbers[y_val] += 1
    else:
        pad_begin_len = random.randint(0, max_length - signal_length)
        pad_end_len = max_length - signal_length - pad_begin_len
        
        new_x_val = np.pad(
            x_val, (pad_begin_len, pad_end_len), 
            'constant', constant_values=(0, 0))
        
        X_full.append(new_x_val)
        y_full.append(y_val)

X_full = np.array(X_full)
y_full = np.array(y_full)

num_samples = X_full.shape[0]
print(num_samples)

2965


In [51]:
import pandas as pd

quantities = {"y": list(range(10)), "quantities": numbers}
df = pd.DataFrame.from_dict(quantities)
print(df)

   y  quantities
0  0           4
1  1           3
2  2           5
3  3           3
4  4           1
5  5           3
6  6           2
7  7           7
8  8           3
9  9           4


In [60]:
tenth = int(num_samples * 0.1)
eightyth = tenth * 8

X_train = X_full[:eightyth]
y_train = y_full[:eightyth]

X_val = X_full[eightyth: eightyth + tenth]
y_val = y_full[eightyth: eightyth + tenth]

X_test = X_full[eightyth + tenth:]
y_test = y_full[eightyth + tenth:]

print('Training set size', len(X_train))
print('Validation set size', len(X_val))
print('Test set size', len(X_test))

Training set size 2368
Validation set size 296
Test set size 301


In [61]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(300, activation="relu", input_shape=(7042,)))
model.add(tf.keras.layers.Dense(100, activation="relu"))
model.add(tf.keras.layers.Dense(10, activation="softmax"))

# alternatively:
# model = keras.models.Sequential([
#     keras.layers.Flatten(input_shape=[28, 28]),
#     keras.layers.Dense(300, activation="relu"),
#     keras.layers.Dense(100, activation="relu"),
#     keras.layers.Dense(10, activation="softmax")
# ])

model.build()
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 300)               2112900   
                                                                 
 dense_13 (Dense)            (None, 100)               30100     
                                                                 
 dense_14 (Dense)            (None, 10)                1010      
                                                                 
Total params: 2,144,010
Trainable params: 2,144,010
Non-trainable params: 0
_________________________________________________________________


In [62]:
model.compile(loss="sparse_categorical_crossentropy",
             optimizer="sgd",
             metrics=["accuracy"])

In [64]:
history = model.fit(X_train, y_train, epochs=30,
                   validation_data=(X_val, y_val))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [65]:
model.evaluate(X_test, y_test)



[2.3025319576263428, 0.09966777265071869]