In [15]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras

In [3]:
DATA_PATH = "audio_data.json"

<br>

## Load data from json file

In [8]:
def load_data(data_path):
    with open(data_path, "r") as fp:
        data = json.load(fp)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    
    return X, y

<br>

## Prepare dataset

In [21]:
def prepare_datasets(test_size, validation_size):
    # load data
    X, y = load_data(DATA_PATH)

    # create train & test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train & validation sets
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # for CNN, tensorflow expect 3D array for each sample
    # Modifying datasets by adding 3rd dimension (# of channels)
    X_train = X_train[..., np.newaxis]  # This returns 4D array => (num_samples, 130, 13, 1)
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    # In (num_samples, 130, 13, 1),
    #     (num_samples, 130, 13, 1) = shape of the each sample of X_train
    #     130 = time bins
    #     13 = MFCC value we take for each time bin

    return X_train, X_validation, X_test, y_train, y_validation, y_test
     

<br>

## Build CNN model

In [22]:
def build_model(input_shape):
    # create model
    model = keras.Sequential()
        
    # 1st convolution layer
    # model.add(keras.layers.Conv2D(# of kernel we use, grid size of kernel, type of activation, input_shape))
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))  # relu should be lowercase
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())
    
    # 2nd convolution layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())
    
    # 3rd convolution layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())
    
    # flatten the output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))  # 64 = number of neurons that we wants
    model.add(keras.layers.Dropout(0.3))  # flatten

    # output layer
    model.add(keras.layers.Dense(10, activation='softmax'))

    return model
    

<br>

## Define a method for prediction

In [26]:
def predict(model, X, y):
    X = X[np.newaxis, ...]

    # prediction = 2D array => [[0.1, 0.2, ...]]
    prediction = model.predict(X, y)  # X => (1, 130, 13, 1)

    # extract index with max value
    predicted_index = np.argmax(prediction, axis=1)  # [4]
    print("\n\nExpected index: {}, Predicted index: {}".format(y, predicted_index))
    

<br>

## Utilize the CNN model

In [27]:
if __name__ == "__main__":
    # create train & validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)
    
    # build the CNN network
    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])  # 4D => 3D 
    model = build_model(input_shape)
    
    # compile the network
    optimizer = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # train the CNN
    model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)
    
    # evaluate the CNN on the test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("\n\nAccuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[100]
    y = y_test[100]
    predict(model, X, y)  # In here, model = trained model

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


Accuracy on test set is: 0.7108530402183533


Expected index: 7, Predicted index: [7]
