In [103]:
import json
import numpy as np
import tensorflow as tf

In [104]:
DATA_PATH = "data.json"

def load_data(data_path = DATA_PATH):
  with open(data_path, "r") as fp:
    data = json.load(fp)

  mfcc = np.array(data["mfcc"])
  delta_mfcc = np.array(data["delta mfcc"])
  labels = np.array(data["labels"])
  return mfcc, delta_mfcc, labels

In [105]:
from sklearn.model_selection import train_test_split

def split_data(features, labels):
  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.30, random_state = 42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15, random_state = 42)
  return X_train, X_test, X_val, y_train, y_test, y_val

In [116]:
import tensorflow.keras as keras

def generate_cnn_model(input_shape):
  model = keras.Sequential()

  model.add(keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = input_shape))
  model.add(keras.layers.MaxPool2D((3, 3), strides = (2, 2), padding = 'same'))
  model.add(keras.layers.BatchNormalization())

  model.add(keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = input_shape))
  model.add(keras.layers.MaxPool2D((3, 3), strides = (2, 2), padding = 'same'))
  model.add(keras.layers.BatchNormalization())

  model.add(keras.layers.Conv2D(128, (2, 2), activation = 'relu', input_shape = input_shape))
  model.add(keras.layers.MaxPool2D((2, 2), strides = (2, 2), padding = 'same'))
  model.add(keras.layers.BatchNormalization())

  model.add(keras.layers.Flatten())
  model.add(keras.layers.Dense(64, activation = 'relu'))
  model.add(keras.layers.Dropout(0.3))

  model.add(keras.layers.Dense(10, activation = 'softmax'))
  print("CNN model loaded.")
  return model

In [107]:
mfcc, delta_mfcc, labels = load_data(DATA_PATH)
print(mfcc.shape)

(9989, 132, 13)


<h3> CNN trained on MFCC: </h3>

In [108]:
# Splitting dataset into training, testing, and validation set
X_train, X_test, X_validation, y_train, y_test, y_validation = split_data(mfcc, labels)

# Adding extra dimension to data for increased stability
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
X_validation = X_validation[..., np.newaxis]

print(mfcc.shape)

# Generating input dimension to the CNN
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
print(input_shape)

(9989, 132, 13)
(132, 13, 1)


In [109]:
# Generating CNN model for input dimension based on MFCC
model_mfcc = generate_cnn_model(input_shape)

# Initializing Adam optimizer for CNN model
optimizer = keras.optimizers.Adam(learning_rate = 0.0001)

# Compiling the model with Adam optimizer
model_mfcc.compile(optimizer = optimizer,
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

model_mfcc.summary()

loaded


In [110]:
# Training model on the training set, while using the validation to check validation accuracy after each epoch
model_mfcc.fit(X_train, y_train, validation_data = (X_validation, y_validation), epochs = 20, batch_size = 32)

Epoch 1/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.2681 - loss: 2.4198 - val_accuracy: 0.4252 - val_loss: 1.6392
Epoch 2/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.4574 - loss: 1.5718 - val_accuracy: 0.5291 - val_loss: 1.3569
Epoch 3/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.5505 - loss: 1.3179 - val_accuracy: 0.5691 - val_loss: 1.2159
Epoch 4/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.5822 - loss: 1.1836 - val_accuracy: 0.5920 - val_loss: 1.1250
Epoch 5/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 32ms/step - accuracy: 0.6309 - loss: 1.0616 - val_accuracy: 0.6101 - val_loss: 1.0882
Epoch 6/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.6650 - loss: 0.9544 - val_accuracy: 0.6215 - val_loss: 1.1070
Epoch 7/20
[1m186/186

<keras.src.callbacks.history.History at 0x1762a3340>

In [111]:
# Evaluating the model with the testing set
_, accuracy = model_mfcc.evaluate(X_test, y_test)
print("Model accuracy: " + str(round(100 * accuracy, 3)) + "%.")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7282 - loss: 0.8299
Model accuracy: 73.44%.


<h3> CNN trained on MFCC and delta MFCC: </h3>

In [112]:
# Merging MFCC and delta(MFCC) into a 2d structure to pass onto the CNN
complex_feature = np.stack([mfcc, delta_mfcc], axis = -1)
print(complex_feature.shape)

# Splitting dataset into training, testing, and validation set
X_train, X_test, X_validation, y_train, y_test, y_validation = split_data(complex_feature, labels)

# Generating input dimension to the CNN
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
print(input_shape)

(9989, 132, 13, 2)
(132, 13, 2)


In [113]:
# Generating CNN model for input dimension based on MFCC and delta MFCC
model_delta = generate_cnn_model(input_shape)

# Initializing Adam optimizer for CNN model
optimizer = keras.optimizers.Adam(learning_rate = 0.0001)

# Compiling the model with Adam optimizer
model_delta.compile(optimizer = optimizer,
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

model_delta.summary()

loaded


In [114]:
# Training model on the training set, while using the validation to check validation accuracy after each epoch
model_delta.fit(X_train, y_train, validation_data = (X_validation, y_validation), epochs = 20, batch_size = 32)

Epoch 1/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.2722 - loss: 2.2729 - val_accuracy: 0.4471 - val_loss: 1.6385
Epoch 2/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 36ms/step - accuracy: 0.4700 - loss: 1.5203 - val_accuracy: 0.5586 - val_loss: 1.2935
Epoch 3/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.5366 - loss: 1.3341 - val_accuracy: 0.5949 - val_loss: 1.1748
Epoch 4/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 36ms/step - accuracy: 0.5965 - loss: 1.1680 - val_accuracy: 0.6053 - val_loss: 1.1139
Epoch 5/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 39ms/step - accuracy: 0.6433 - loss: 1.0405 - val_accuracy: 0.6235 - val_loss: 1.0706
Epoch 6/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 42ms/step - accuracy: 0.6759 - loss: 0.9298 - val_accuracy: 0.6244 - val_loss: 1.0355
Epoch 7/20
[1m186/18

<keras.src.callbacks.history.History at 0x17a5768b0>

In [115]:
# Evaluating the model with the testing set
_, accuracy = model_delta.evaluate(X_test, y_test)
print("Model accuracy: " + str(round(100 * accuracy, 3)) + "%.")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.7350 - loss: 0.8140
Model accuracy: 74.174%.
