In [1]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras

# path to json file that stores MFCCs and genre labels for each processed segment
DATA_PATH = "mfcc_data_20s.json"

def load_data(data_path):
    """Loads training dataset from json file.

        :param data_path (str): Path to json file containing data
        :return X (ndarray): Inputs
        :return y (ndarray): Targets
    """

    with open(data_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X = np.array(data["mfccs"])
    y = np.array(data["labels"])
    genres = data["genres"]

    print("Data succesfully loaded!")

    return  X, y, genres

In [2]:
# load data
X, y, genres = load_data(DATA_PATH)

Data succesfully loaded!


In [3]:
# create train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [4]:
# build network topology
model = keras.Sequential([

    # input layer
    keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),

    # 1st dense layer
    keras.layers.Dense(512, activation='relu'),

    # 2nd dense layer
    keras.layers.Dense(256, activation='relu'),

    # 3rd dense layer
    keras.layers.Dense(64, activation='relu'),

    # output layer
    keras.layers.Dense(len(genres), activation='softmax')
])

# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 17240)             0         
                                                                 
 dense (Dense)               (None, 512)               8827392   
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 64)                16448     
                                                                 
 dense_3 (Dense)             (None, 10)                650       
                                                                 
Total params: 8,975,818
Trainable params: 8,975,818
Non-trainable params: 0
_________________________________________________________________


In [5]:
# train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [6]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=64)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [5.880698204040527, 0.4866666793823242]


In [7]:
# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print("Generate predictions for tests")
predictions = model.predict(X_test)
correct = 0
for i in range(predictions.shape[0]):
    max_confidence = max(predictions[i])
    pred = np.argmax(predictions[i])
    actual = y_test[i]
    if pred == actual:
        correct += 1
    print('Prediction:', genres[pred], '\tActual:', genres[actual], '\nConfidence:', max(predictions[i]),'\n')

Generate predictions for tests
Prediction: rock 	Actual: disco 
Confidence: 0.9990683 

Prediction: blues 	Actual: country 
Confidence: 0.8473773 

Prediction: jazz 	Actual: jazz 
Confidence: 0.99993503 

Prediction: rock 	Actual: rock 
Confidence: 0.8048592 

Prediction: reggae 	Actual: reggae 
Confidence: 0.9739715 

Prediction: metal 	Actual: hiphop 
Confidence: 0.73423684 

Prediction: country 	Actual: rock 
Confidence: 0.97374356 

Prediction: classical 	Actual: classical 
Confidence: 0.99998736 

Prediction: jazz 	Actual: pop 
Confidence: 0.99876934 

Prediction: classical 	Actual: classical 
Confidence: 1.0 

Prediction: pop 	Actual: pop 
Confidence: 0.99999523 

Prediction: pop 	Actual: pop 
Confidence: 1.0 

Prediction: jazz 	Actual: reggae 
Confidence: 0.99831796 

Prediction: classical 	Actual: classical 
Confidence: 1.0 

Prediction: blues 	Actual: country 
Confidence: 0.78811103 

Prediction: disco 	Actual: disco 
Confidence: 0.85710347 

Prediction: pop 	Actual: hiphop 
C

In [8]:
print(correct / predictions.shape[0] * 100,'% correctly predicted')

48.66666666666667 % correctly predicted
