In [4]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt

data_path = "genres/data.json"


def load_data(dataset_path):
    """
    Loads training dataset from json file
    :param data_path (str): Path to json file containing data
    :return X (ndarray): Inputs
    :return y (ndarray): Targets
    """

    with open(data_path, "r") as fp:
        data = json.load(fp)

    #convert lists into numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    return X, y


def prepare_datasets(test_size, validation_size):

    #load data
    X, y = load_data(data_path)

    #create traion/test split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size)

    #create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(
        X_train, y_train, test_size=validation_size)

    #3d array -> is needed as an input to the CNN
    #(130,13,1)
    X_train = X_train[..., np.newaxis]  #4d array -> (num_samples,130,13,1)
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):

    #create model
    model = keras.Sequential()

    #1st conv layer
    model.add(
        keras.layers.Conv2D(32, (3, 3),
                            activation='relu',
                            input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    #2nd conv layer
    model.add(
        keras.layers.Conv2D(32, (3, 3),
                            activation='relu',
                            input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    #3rd conv layer
    model.add(
        keras.layers.Conv2D(32, (2, 2),
                            activation='relu',
                            input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    #flatten the output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    #output layer uses softmax
    model.add(keras.layers.Dense(10, activation='softmax'))

    return model


def predict(model, X, y):

    X = X[np.newaxis, ...]

    #expects a 4d array
    #prediction is a 2D array = [[0.1, 0.2, ...]] 10 different scores
    prediction = model.predict(X)  # X->(1 (# of sample),130,13,1)

    #extract the index with max value
    predicted_index = np.argmax(prediction, axis=1)  #[3] index predicted
    print("Expected index: {}, Predicted index{}".format(y, predicted_index))


if __name__ == "__main__":

    #create train, validation, and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(
        0.25, 0.2)

    #build the CNN net
    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
    model = build_model(input_shape)

    #compile the network
    optimizer = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    model.summary()

    #train the CNN
    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_validation, y_validation),
                        batch_size=32,
                        epochs=30)

    #evaluate the CNN on test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is {}".format(test_accuracy))

    #make prediction on a sample
    X = X_test[100]
    y = y_test[100]

    predict(model, X, y)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 128, 11, 32)       320       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 64, 6, 32)         0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 64, 6, 32)         128       
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 62, 4, 32)         9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 31, 2, 32)         0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 31, 2, 32)         128       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 30, 1, 32)        

In [8]:
    X = X_test[2214]
    y = y_test[2214]

    predict(model, X, y)

Expected index: 7, Predicted index[4]


In [6]:
X_test.shape

(2499, 130, 13, 1)