In [18]:
import random
import tensorflow as tf
import numpy as np
import matplotlib as plt
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import json

tf.keras.backend.clear_session()
%precision 4

L2_WEIGHT_DECAY = 0.01
L1_WEIGHT_DECAY = 0.003

In [19]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [20]:
def load_json_data(path):
    with open(path, "r") as fp:
        data = json.load(fp)
    
    a = np.array(data["mfcc"])
    label = np.array(data["label"])

    mfcc = a[..., np.newaxis]
    print(mfcc.shape)

    return mfcc, label 

In [21]:
train_path = "../parse_dataset_labels/parse_sound_files/tess_ravdess_train.json"
validate_path = "../parse_dataset_labels/parse_sound_files/tess_ravdess_validation.json"
test_path = "../parse_dataset_labels/parse_sound_files/tess_ravdess_test.json"

train_mfcc_b, train_label_b = load_json_data(train_path)
validate_mfcc_b, validate_label_b = load_json_data(validate_path)
test_mfcc_b, test_label_b = load_json_data(test_path)

x = list(zip(train_mfcc_b, train_label_b))
random.shuffle(x)
train_mfcc_tuple, train_label_tuple = zip(*x)
train_mfcc = np.array(train_mfcc_tuple)
train_label = np.array(train_label_tuple)

x = list(zip(validate_mfcc_b, validate_label_b))
random.shuffle(x)
validate_mfcc_tuple, validate_label_tuple = zip(*x)
validate_mfcc_tuple = np.array(validate_mfcc_tuple)
validate_label_tuple = np.array(validate_label_tuple)

x = list(zip(test_mfcc_b, test_label_b))
random.shuffle(x)
test_mfcc_tuple, test_label_tuple = zip(*x)
test_mfcc = np.array(test_mfcc_tuple)
test_label = np.array(test_label_tuple)

(9642, 87, 13, 1)
(1212, 87, 13, 1)
(1187, 87, 13, 1)


In [27]:
# abc = tf.keras.utils.to_categorical(train_label)
# print(abc)
train_label = tf.keras.utils.to_categorical(train_label)
test_label = tf.keras.utils.to_categorical(test_label)
validate_label = tf.keras.utils.to_categorical(validate_label)

In [28]:
# using the hop length and fft params we have 87 time steps with 13 values for each
input_layer = tf.keras.layers.Input(shape=(87, 13, 1))
x = tf.keras.layers.Conv2D(96, (11,11), strides=4, padding="same", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(input_layer)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.MaxPool2D((3,3), strides=2, padding="same")(x)

x = tf.keras.layers.Conv2D(256, (5,5), padding="same", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.MaxPool2D((3,3), strides=2, padding="same")(x)

x = tf.keras.layers.Conv2D(384, (5,5), padding="same", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(x)
x = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.Conv2D(384, (5,5), padding="same", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(x)
x = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.Conv2D(256, (5,5), padding="same", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.MaxPool2D((3,3), strides=2, padding="same")(x)

x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.Dense(4096, activation="relu", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(x)
x = tf.keras.layers.Dense(4096, activation="relu", kernel_initializer='he_normal', bias_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY))(x)
x = tf.keras.layers.Dense(5, activation="softmax", kernel_initializer='he_normal', bias_initializer="he_normal")(x)

model = Model(input_layer, x, name='alexNet')

model.summary()

Model: "alexNet"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 87, 13, 1)]       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 22, 4, 96)         11712     
_________________________________________________________________
activation_5 (Activation)    (None, 22, 4, 96)         0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 11, 2, 96)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 11, 2, 256)        614656    
_________________________________________________________________
activation_6 (Activation)    (None, 11, 2, 256)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 6, 1, 256)         0   

In [29]:
# input_layer = tf.keras.layers.Input(shape=(train_mfcc.shape[1], train_mfcc.shape[2], train_mfcc.shape[3]))
# x = tf.keras.layers.Conv2D(96, (3,3), activation="relu", kernel_initializer='he_normal', bias_initializer="he_normal")(input_layer)
# x = tf.keras.layers.MaxPool2D((3,3), strides=2, padding="same")(x)
# # Batch normalization standardizes the activations of the current layer and what activations get sent to the next layer. this helps
# # the model converge a lot faster because it has normalized values flowing through the model
# x = tf.keras.layers.BatchNormalization()(x)

# x = tf.keras.layers.Conv2D(256, (3,3), activation="relu", kernel_initializer='he_normal', bias_initializer="he_normal")(x)
# x = tf.keras.layers.MaxPool2D((3,3), strides=2, padding="same")(x)
# x = tf.keras.layers.BatchNormalization()(x)

# x = tf.keras.layers.Conv2D(512, (3,3), activation="relu", kernel_initializer='he_normal', bias_initializer="he_normal")(x)
# x = tf.keras.layers.MaxPool2D((3,3), strides=2, padding="same")(x)
# x = tf.keras.layers.BatchNormalization()(x)

# x = tf.keras.layers.Flatten()(x)

In [30]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, decay=0.0001), loss='categorical_crossentropy', metrics=['acc'])

In [31]:
check_points = "checkpoint/checkpoint_sound.hb/"
check_point_dir = os.path.dirname(check_points)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=check_point_dir, verbose=1, monitor="val_acc", save_best_only=True)

In [26]:
model.fit(train_mfcc, train_label, 
        validation_data=(validate_mfcc, validate_label), 
        verbose=1, 
        batch_size=32, 
        epochs=5)

Epoch 1/5


ValueError: in user code:

    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:749 train_step
        y, y_pred, sample_weight, regularization_losses=self.losses)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__
        losses = ag_call(y_true, y_pred)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\losses.py:253 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\losses.py:1535 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\keras\backend.py:4687 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    d:\Reluu\record_video\venv\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 5) are incompatible
