In [1]:
import json
import numpy as np
import tensorflow as tf



In [66]:
DATA_PATH = "data.json"

def load_data(data_path = DATA_PATH):
  with open(data_path, "r") as fp:
    data = json.load(fp)

  mfcc = np.array(data["mfcc"])
  delta_mfcc = np.array(data["delta mfcc"])
  labels = np.array(data["labels"])
  return mfcc, delta_mfcc, labels

In [67]:
from sklearn.model_selection import train_test_split

def split_data(features, labels):
  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.30, random_state = 42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15, random_state = 42)
  return X_train, X_test, X_val, y_train, y_test, y_val

In [68]:
import tensorflow.keras as keras

def generate_cnn_model(input_shape):
  model = keras.Sequential()

  model.add(keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = input_shape))
  model.add(keras.layers.MaxPool2D((3, 3), strides = (2, 2), padding = 'same'))
  model.add(keras.layers.BatchNormalization())

  model.add(keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = input_shape))
  model.add(keras.layers.MaxPool2D((3, 3), strides = (2, 2), padding = 'same'))
  model.add(keras.layers.BatchNormalization())

  model.add(keras.layers.Conv2D(128, (2, 2), activation = 'relu', input_shape = input_shape))
  model.add(keras.layers.MaxPool2D((2, 2), strides = (2, 2), padding = 'same'))
  model.add(keras.layers.BatchNormalization())

  model.add(keras.layers.Flatten())
  model.add(keras.layers.Dense(64, activation = 'relu'))
  model.add(keras.layers.Dropout(0.3))

  model.add(keras.layers.Dense(10, activation = 'softmax'))
  print("loaded")
  return model

In [69]:
mfcc, delta_mfcc, labels = load_data(DATA_PATH)
print(mfcc.shape)

complex_feature = np.stack([mfcc, delta_mfcc], axis = -1)
print(complex_feature.shape)

(9989, 132, 13)
(9989, 132, 13, 2)


<h3> CNN trained on MFCC: </h3>

In [79]:
# Splitting dataset into training, testing, and validation set
X_train, X_test, X_validation, y_train, y_test, y_validation = split_data(mfcc, labels)

# Adding extra dimension to data for increased stability
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
X_validation = X_validation[..., np.newaxis]

print(mfcc.shape)

# Generating input dimension to the CNN
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
print(input_shape)

(5943, 132, 13, 1)
(132, 13, 1)


In [81]:
import tensorflow.keras as keras

# Generating CNN model for input dimension based on MFCC
model_mfcc = generate_cnn_model(input_shape)

# Initializing Adam optimizer for CNN model
optimizer = keras.optimizers.Adam(learning_rate = 0.0001)

# Compiling the model with Adam optimizer
model_mfcc.compile(optimizer = optimizer,
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

model_mfcc.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


loaded


In [83]:
# Training model on the training set, while using the validation to check validation accuracy after each epoch
model_mfcc.fit(X_train, y_train, validation_data = (X_validation, y_validation), epochs = 20, batch_size = 32)

Epoch 1/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.2628 - loss: 2.3500 - val_accuracy: 0.4090 - val_loss: 1.6518
Epoch 2/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.4748 - loss: 1.5058 - val_accuracy: 0.5434 - val_loss: 1.2742
Epoch 3/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - accuracy: 0.5657 - loss: 1.2538 - val_accuracy: 0.5815 - val_loss: 1.1757
Epoch 4/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.5949 - loss: 1.1317 - val_accuracy: 0.6101 - val_loss: 1.1142
Epoch 5/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.6262 - loss: 1.0332 - val_accuracy: 0.6292 - val_loss: 1.0673
Epoch 6/20
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 32ms/step - accuracy: 0.6563 - loss: 0.9627 - val_accuracy: 0.6530 - val_loss: 1.0215
Epoch 7/20
[1m186/186

<keras.src.callbacks.history.History at 0x331c24a00>

In [86]:
# Evaluating the model with the testing set
_, accuracy = model_mfcc.evaluate(X_test, y_test)
print("Model accuracy: " + str(round(100 * accuracy, 3)) + "%.")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7087 - loss: 0.9104
Model accuracy: 72.272%.
