# Imports and Deterministic Behaviour

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import re
import numpy as np
import matplotlib.pyplot as plt

import data_operations as data_ops

import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D,  MaxPooling1D, BatchNormalization, GlobalMaxPooling1D

import pandas as pd

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [2]:
DATA_DIR = "./data/SAVEE"

TRAIN_SIZE, VAL_SIZE, TEST_SIZE = (300, 100, 80)

SAMPLE_RATE_HZ = 44100

# Load audio and Get Label functions

In [3]:
def get_label(file_path):
  parts = re.sub('.+\_|[0-9]+.wav', '', file_path)
  return parts

In [4]:
filenames = [f'{DATA_DIR}/{p}' for p in os.listdir(DATA_DIR)]
np.random.shuffle(filenames)
labels = pd.get_dummies(list(map(get_label, filenames))).to_numpy()

In [5]:
train_files, train_labels = (filenames[:TRAIN_SIZE], labels[:TRAIN_SIZE])
val_files, val_labels = (filenames[TRAIN_SIZE: TRAIN_SIZE + VAL_SIZE], labels[TRAIN_SIZE: TRAIN_SIZE + VAL_SIZE])
test_files, test_labels = (filenames[-TEST_SIZE:], labels[-TEST_SIZE:])

print('Training set size', len(train_files))
print('Validation set size', len(val_files))
print('Test set size', len(test_files))
(train_files[0], train_labels[0])

Training set size 300
Validation set size 100
Test set size 80


('./data/SAVEE/DC_n14.wav', array([0, 0, 0, 0, 1, 0, 0], dtype=uint8))

In [6]:
operations = [
    data_ops.ReadFile(),
    data_ops.DecodeWav(),
    data_ops.Squeeze(),
    data_ops.Crop(SAMPLE_RATE_HZ * 8),
    data_ops.ZeroPad(SAMPLE_RATE_HZ * 8),
    data_ops.CastToFloat(),
    data_ops.Reshape((SAMPLE_RATE_HZ * 8, 1))
]

train_ds = tfio.audio.AudioIODataset.from_tensor_slices((train_files, train_labels))
val_ds = tfio.audio.AudioIODataset.from_tensor_slices((val_files, val_labels))

In [7]:
for o in operations:
    train_ds = train_ds.map(o, num_parallel_calls=tf.data.AUTOTUNE)

first_el = list(train_ds.as_numpy_iterator())[0]
first_el

(array([[1275.],
        [1279.],
        [1282.],
        ...,
        [   0.],
        [   0.],
        [   0.]], dtype=float32),
 array([0, 0, 0, 0, 1, 0, 0], dtype=uint8))

In [8]:
# val_ds = tfio.audio.AudioIODataset.from_tensor_slices(val_files)
for o in operations:
    val_ds = val_ds.map(o, num_parallel_calls=tf.data.AUTOTUNE)

first_el = list(val_ds.as_numpy_iterator())[0]
first_el

(array([[1297.],
        [1293.],
        [1290.],
        ...,
        [   0.],
        [   0.],
        [   0.]], dtype=float32),
 array([0, 0, 0, 0, 0, 1, 0], dtype=uint8))

In [9]:
# rows = 3
# cols = 3
# n = rows * cols
# fig, axes = plt.subplots(rows, cols, figsize=(12, 15))

# for i, (audio, label) in enumerate(train_ds.take(n)):
#   r = i // cols
#   c = i % cols
#   ax = axes[r][c]
#   ax.plot(audio.numpy())
#   print(audio.numpy().shape)
#   label = label.numpy().decode('utf-8')
#   ax.set_title(label)

# plt.show()

## CNN Architecture
 * 6x 1D + Batch norm + max pooling
 * 1x 1D + Batch norm + global max pooling
 * 2x dense layer

In [10]:
first_filter_size =  (32, 21)
filters = [64, 128, 256, 512, 1024]
sizes = [19, 17, 15, 13, 11]
middle_filters_size = list(zip(filters, sizes))

activation= 'relu'
pool_size = 2

last_filter_size =  (1024, 9)

In [11]:
SAMPLE_RATE_HZ * 8

352800

In [12]:
model = Sequential()

# first layer
# input shape (None, n) = variable-length sequences of n-dimensional vectors
model.add(Conv1D(first_filter_size[0], first_filter_size[1], activation = activation, input_shape=(SAMPLE_RATE_HZ * 8, 1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size = pool_size))

# middle layers
for (filter_size, kernel_size) in middle_filters_size:
    model.add(Conv1D(filter_size, kernel_size, activation = activation))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = pool_size))

# last layer
model.add(Conv1D(last_filter_size[0], last_filter_size[1], activation = activation))
model.add(BatchNormalization())
model.add(GlobalMaxPooling1D())

# model.add(Flatten())
model.add(Dense(128, activation=activation))
model.add(Dense(7, activation='softmax'))

model.summary()

opt = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 352780, 32)        704       
                                                                 
 batch_normalization (BatchN  (None, 352780, 32)       128       
 ormalization)                                                   
                                                                 
 max_pooling1d (MaxPooling1D  (None, 176390, 32)       0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 176372, 64)        38976     
                                                                 
 batch_normalization_1 (Batc  (None, 176372, 64)       256       
 hNormalization)                                                 
                                                        

In [13]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [14]:
train_ds

<BatchDataset element_spec=(TensorSpec(shape=(None, 352800, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 7), dtype=tf.uint8, name=None))>

In [None]:
# model.fit(train_files['padded'].map(lambda x : tf.convert_to_tensor(x.numpy())), train_files['label'], epochs=10, validation_data=(val_files['padded'].map(lambda x : tf.convert_to_tensor(x.numpy())), val_files['label']))
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10


Exception ignored in: <function ScopedTFGraph.__del__ at 0x00000184B9EDE700>
Traceback (most recent call last):
  File "d:\Programmi\venvs\tf-gpu\lib\site-packages\tensorflow\python\framework\c_api_util.py", line 54, in __del__
    self.deleter(self.graph)
KeyboardInterrupt: 


: 

: 