In [1]:
import numpy as np
import tensorflow as tf
import os

In [2]:
classes = np.array(["air_conditioner","car_horn","children_playing","dog_bark","drilling","engine_idling","gun_shot","jackhammer","siren","street_music"])
classes_tensor = tf.convert_to_tensor(classes)

FULL DATASET ACQUISITION

In [None]:
dataset_path = "/mnt/3206BFFF191E7F85/UrbanSound8K/audio/"
folders = os.listdir(dataset_path)
train_files = []
for fold in os.listdir(dataset_path)[2:]:
  files = os.listdir(os.path.join(dataset_path, fold))
  for i,f in enumerate(files):
    files[i] = dataset_path+fold+"/"+f
  train_files.extend(files)

In [5]:
def decode_audio(audio_binary):
  audio, _ = tf.audio.decode_wav(audio_binary, desired_samples=128000, desired_channels=1)
  return tf.squeeze(audio, axis=-1)

DATASET CLEANING FOR WRONG WAV FILES

In [6]:
errors = 0
waste = []
for index, file in enumerate(train_files):
    audio_binary = tf.io.read_file(file)
    try:
        waveform = decode_audio(audio_binary)
    except tf.errors.InvalidArgumentError:
        waste.append(index)

for corrupted in waste[::-1]:
    del train_files[corrupted]

In [7]:
def get_label(file_path):
  parts = tf.strings.split(file_path, os.path.sep)
  filename = parts[-1]
  splitted = tf.strings.split(filename, "-")
  classID = splitted[1]
  classID = tf.strings.to_number(classID, tf.int32)
  # Note: You'll use indexing here instead of tuple unpacking to enable this
  # to work in a TensorFlow graph.
  return classes_tensor[classID]

In [8]:
def get_waveform_and_label(file_path):
  label = get_label(file_path)
  audio_binary = tf.io.read_file(file_path)
  waveform = decode_audio(audio_binary)
  return waveform, label

In [10]:
def float_wf(waveform):
    waveform = tf.cast(waveform, tf.float32)
    return waveform
def float_wf_and_label(audio, label):
  spectrogram = float_wf(audio)
  #spectrogram = tf.expand_dims(spectrogram, -1)
  label_id = tf.argmax(label == classes)
  return spectrogram, label_id

In [11]:
def preprocess_dataset(filelist):
    AUTOTUNE = -1
    files_ds = tf.data.Dataset.from_tensor_slices(filelist)
    waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)
    ds = waveform_ds.map(float_wf_and_label, num_parallel_calls=AUTOTUNE)
    return ds

In [23]:
import random
def divide_by_class(fileslist):
    def get_label_id(file_path : str):
      parts = file_path.split(os.path.sep)
      filename = parts[-1]
      splitted = filename.split("-")
      classID = splitted[1]
      return  int(classID)

    bins = [[],[],[],[],[],[],[],[],[],[]]
    for file in fileslist:
        index = get_label_id(file)
        bins[index].append(file)
    return bins

def split(ds, train_percentage = 0.70, val_percentage = 0.10, test_percentage =0.2):
    total_files = len(ds)
    train_end_index = int(total_files*train_percentage)
    val_end_index = train_end_index + int(total_files*val_percentage)
    tr_s = ds[0:train_end_index]
    v_s = ds[train_end_index:val_end_index]
    te_s = ds[val_end_index:]
    return tr_s, v_s, te_s

def split_dataset(fileslist):
    random.shuffle(fileslist)
    bins = divide_by_class(fileslist)
    train = []
    val = []
    test = []
    for bin in bins:
        train_temp, val_temp, test_temp = split(bin)
        train.extend(train_temp)
        val.extend(val_temp)
        test.extend(test_temp)
    random.shuffle(train)
    random.shuffle(val)
    random.shuffle(test)
    return train, val, test

trainFiles, valFiles, testFiles = split_dataset(train_files)

In [31]:
train_set = preprocess_dataset(trainFiles)
val_set = preprocess_dataset(valFiles)
test_set = preprocess_dataset(testFiles)

MODEL AND PARAMETERS

In [26]:
from tensorflow.keras import layers
from tensorflow.keras import models
model = models.Sequential([
    layers.Input(shape=(128000,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.25),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10),
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                8192064   
_________________________________________________________________
dense_5 (Dense)              (None, 128)               8320      
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               16512     
_________________________________________________________________
dropout_5 (Dropout)          (None, 128)              

In [27]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)

DATASETS PREPAIRING

In [25]:
del model

In [32]:
batch_size = 64
AUTOTUNE = -1
train_set = train_set.batch(batch_size)
train_set = train_set.cache().prefetch(AUTOTUNE)
val_set = val_set.batch(batch_size)
val_set = val_set.cache().prefetch(AUTOTUNE)
test_set = test_set.batch(batch_size)
test_set = test_set.cache().prefetch(AUTOTUNE)

In [33]:
EPOCHS = 100
history = model.fit(
    train_set,
    validation_data=val_set,
    epochs=EPOCHS,
)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

KeyboardInterrupt: 