In [1]:
import numpy as np
import json
import tensorflow.keras as keras

In [2]:
DATA_PATH = 'data.json'

with open(DATA_PATH, "r") as fp:
    data = json.load(fp)

# Loading data from extracted json file
chroma = data["chroma_vector"]
labels = data["labels"]

# Padding data to reshape data into identical shapes
max_length = max(len(arr) for arr in chroma)
chroma = [
    np.pad(
        arr,
        pad_width=((0, max_length - len(arr)), (0, 0)),  
        mode='constant',
        constant_values=0
    )
    for arr in chroma
]

chroma = np.array(chroma)
labels = np.array(labels)

print(chroma.shape)
print(labels.shape)

In [4]:
from sklearn.model_selection import train_test_split

def split_data(features, labels):
  """
  split_data(features, labels) splits the input data (features, labels) into training
  set, validation set, and testing set split as 60-10-30 i.e. 60% of the input data
  makes up the training set, 10% makes up the validation set, and 30% makes up the
  testing set.
  """
  
  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.30, random_state = 42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15, random_state = 42)
  return X_train, X_test, X_val, y_train, y_test, y_val

In [5]:
X_train, X_test, X_validation, y_train, y_test, y_validation = split_data(chroma, labels)

# Adding extra dimension to data for increased stability
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
X_validation = X_validation[..., np.newaxis]

print(chroma.shape)

# Generating input dimension to the CNN
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
print(input_shape)

(1996, 290, 12)
(290, 12, 1)


In [6]:
model = keras.Sequential()

# First Convolution Layer
model.add(keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = input_shape))
model.add(keras.layers.MaxPool2D((3, 3), strides = (2, 2), padding = 'same'))
model.add(keras.layers.BatchNormalization())

# Second Convolution Layer
model.add(keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = input_shape))
model.add(keras.layers.MaxPool2D((3, 3), strides = (2, 2), padding = 'same'))
model.add(keras.layers.BatchNormalization())

# Third Convolution Layer
model.add(keras.layers.Conv2D(128, (2, 2), activation = 'relu', input_shape = input_shape))
model.add(keras.layers.MaxPool2D((2, 2), strides = (2, 2), padding = 'same'))
model.add(keras.layers.BatchNormalization())

# Converting the neurons into a 1D layer
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(64, activation = 'relu'))
model.add(keras.layers.Dropout(0.3))

# Condensing neuron layer into 10 nodes, each corresponding to a chord label
model.add(keras.layers.Dense(10, activation = 'softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
optimizer = keras.optimizers.Adam(learning_rate = 0.0001)

# Compiling the model with Adam optimizer
model.compile(optimizer = optimizer,
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

model.summary()

In [8]:
base_history = model.fit(X_train, y_train, validation_data = (X_validation, y_validation), epochs = 30, batch_size = 32)

Epoch 1/30
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.1534 - loss: 2.6333 - val_accuracy: 0.1143 - val_loss: 2.2934
Epoch 2/30
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.4500 - loss: 1.6175 - val_accuracy: 0.1238 - val_loss: 2.2853
Epoch 3/30
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.6183 - loss: 1.0911 - val_accuracy: 0.1143 - val_loss: 2.2645
Epoch 4/30
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.7147 - loss: 0.8392 - val_accuracy: 0.1333 - val_loss: 2.2307
Epoch 5/30
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.8077 - loss: 0.5694 - val_accuracy: 0.1571 - val_loss: 2.1814
Epoch 6/30
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.8354 - loss: 0.4955 - val_accuracy: 0.2381 - val_loss: 2.0406
Epoch 7/30
[1m38/38[0m [32m━━━━

In [9]:
_, accuracy = model.evaluate(X_test, y_test)
print("Model accuracy: " + str(round(100 * accuracy, 3)) + "%.")

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9627 - loss: 0.1251
Model accuracy: 95.159%.
