### imports

In [1]:
# imports
import pandas as pd
import numpy as np
from numpy import load
from numpy import asarray
from numpy import save
from numpy import mean
from numpy import std

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
from matplotlib.image import imread
%matplotlib inline
import seaborn as sns
sns.set(style='ticks')

In [2]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers

from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
path_images = "images_2/"

In [45]:
data = pd.read_csv("labels_2/chords_seq.csv", index_col=0)

In [48]:
data.head()

Unnamed: 0_level_0,chord_sequ,len_sequ
track,Unnamed: 1_level_1,Unnamed: 2_level_1
00_BN1-129-Eb_comp_hex_cln.wav,"D#:maj, D#:maj, D#:maj, D#:maj, D#:maj, D#:maj...",45
00_BN1-147-Gb_comp_hex_cln.wav,"F#:maj, F#:maj, F#:maj, F#:maj, F#:maj, F#:maj...",40
00_BN2-131-B_comp_hex_cln.wav,"A:7, A:7, D:maj, D:maj, D:maj, G:maj, G:maj, G...",59
00_BN2-166-Ab_comp_hex_cln.wav,"B:maj, B:maj, B:maj, E:maj, E:maj, E:maj, A#:h...",47
00_BN3-119-G_comp_hex_cln.wav,"D:maj, D:maj, D:maj, E:min, E:min, E:min, E:mi...",65


### Prepare data for modeling
We'll be having sequences of 30 (the minimum sequence in the data)\
**Steps**\
Shuffle dataframe
Split data between train and test (keep whole songs as testing data\
Create a images list containing only names of images we're interested in (30 per track)\
Create labels list containing only the first 30 labels for every song\
Prep X data: for each item in the names list\
-- transform into array and reshape\
Prep y data: label encore the chords and for each item in the labels list:\
-- one hot encode chord and reshape

In [53]:
# shuffle dataframe
data_shuffled = data.sample(frac=1)

In [54]:
data_shuffled.head()

Unnamed: 0_level_0,chord_sequ,len_sequ
track,Unnamed: 1_level_1,Unnamed: 2_level_1
05_SS3-84-Bb_comp_hex_cln.wav,"F:maj, F:maj, F:maj, F:maj, F:maj, F:maj, G:mi...",92
05_SS1-100-C#_comp_hex_cln.wav,"C#:maj, C#:maj, C#:maj, C#:maj, C#:maj, C#:maj...",59
02_Jazz3-137-Eb_comp_hex_cln.wav,"A#:maj, A#:maj, C:min, C:min, C:min, G:min, G:...",57
04_Jazz3-150-C_comp_hex_cln.wav,"G:maj, A:min, A:min, A:min, E:min, E:min, E:mi...",52
01_SS2-107-Ab_comp_hex_cln.wav,"F#:7, F#:7, F#:7, B:maj, B:maj, B:maj, B:maj, ...",72


In [129]:
# this song has only 24 chords. drop it
for chord in data_shuffled[data_shuffled.index == '03_Jazz1-200-B_comp_hex_cln.wav']['chord_sequ']:
    print(chord)

B:maj, B:maj, B:maj, B:maj, E:maj, E:maj, E:maj, E:maj, E:maj, B:maj, B:maj, B:maj, B:maj, B:maj, F#:maj, F#:maj, E:maj, E:maj, E:maj, B:maj, B:maj, B:maj, B:maj, B:maj


In [135]:
# drop this row
print(len(data_shuffled))
data_shuffled = data_shuffled.drop('03_Jazz1-200-B_comp_hex_cln.wav', axis=0)
print(len(data_shuffled))

180
179


In [149]:
# drop other rows that have less than 30 chords in the sequence
data_shuffled = data_shuffled.drop('01_Jazz1-200-B_comp_hex_cln.wav', axis=0)
data_shuffled = data_shuffled.drop('02_Jazz1-200-B_comp_hex_cln.wav', axis=0)
data_shuffled = data_shuffled.drop('00_Jazz1-200-B_comp_hex_cln.wav', axis=0)

In [150]:
# split between train and test
test_portion = 0.2
data_train = data_shuffled.iloc[:round(0.8*len(data_shuffled))]
data_test = data_shuffled.iloc[round(0.8*len(data_shuffled)):]

# make sure everything is in either track_train or track_test
len(data_train) + len(data_test)

176

In [151]:
len(data_train)

141

In [152]:
# make sure we only have unique tracks
print(len(set(list(data_train.index))))

141


In [172]:
# create images_list and labels_list first 30 items. 
# make the labels list a flat list

images_list = []
labels_list = []

for index, row in data_train.iterrows():
    track_name = index[:-12]
    sequ_30 = row['chord_sequ'].split(',')[:30]
    
    # append chord
    for chord in sequ_30:
        labels_list.append(chord)

    # append image name
    for i in range(1,31):
        images_list.append(track_name + '_' + str(i))

In [154]:
len(images_list)/30

141.0

In [155]:
len(labels_list)/30

141.0

#### prep X data

In [156]:
# load photos and transform into arrays

photos = list()
for image in images_list:
    # load image
    photo = load_img(path_images + image + '.png', color_mode = "grayscale", target_size=(64,64))
    # convert to numpy array
    photo = img_to_array(photo)
    photos.append(photo)

In [165]:
# convert photos to np arrays
X_train = asarray(photos)
X_train.shape

(4230, 64, 64, 1)

In [160]:
# save array
save('photos2.npy', X_train)

#### prep y data

In [161]:
# label encode chords
le2 = LabelEncoder()
labels_list = le2.fit_transform(labels_list)
labels_list[:5]

array([57, 33, 33, 33, 33])

In [162]:
# save label encoder
import pickle
filename = 'labelencoder2.sav'
pickle.dump(le2, open(filename, 'wb'))

In [163]:
# convert labels to np arrays
labels = asarray(labels_list)
labels.shape

(4230,)

In [178]:
# print unique chords
print(sorted(set(labels_list)))
print(f'number of classes: {len(set(labels_list))}')

[' A#:7', ' A#:hdim7', ' A#:maj', ' A#:min', ' A:7', ' A:hdim7', ' A:maj', ' A:min', ' B:maj', ' B:min', ' C#:7', ' C#:hdim7', ' C#:maj', ' C#:min', ' C:7', ' C:hdim7', ' C:maj', ' C:min', ' D#:7', ' D#:maj', ' D#:min', ' D:7', ' D:maj', ' D:min', ' E:7', ' E:hdim7', ' E:maj', ' E:min', ' F#:7', ' F#:maj', ' F#:min', ' F:7', ' F:hdim7', ' F:maj', ' F:min', ' G#:7', ' G#:hdim7', ' G#:maj', ' G#:min', ' G:hdim7', ' G:maj', ' G:min', 'A#:maj', 'A:7', 'A:maj', 'B:maj', 'C#:7', 'C#:maj', 'C:7', 'C:maj', 'D#:7', 'D#:maj', 'D:maj', 'E:maj', 'F#:7', 'F#:maj', 'F:7', 'F:maj', 'G#:7', 'G#:maj', 'G:maj']
number of classes: 61


In [164]:
# save array
save('labels2.npy', labels)

In [166]:
# one hot encode target values
print(labels[:5])
y_train = to_categorical(labels)
print(y_train[:5])

[57 33 33 33 33]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [168]:
# make sure length of vector is 61
len(y_train[0])

61

In [179]:
print(X_train.shape)
print(y_train.shape)

(4230, 64, 64, 1)
(4230, 61)


### `load_data` function

In [182]:
def load_dataset():
    # load arrays
    X_train = load('photos2.npy')
    labels = load('labels2.npy')
    
    # one hot encode target values
    y_train = to_categorical(labels)
    return X_train, y_train

In [183]:
X_train, y_train = load_dataset()
print(X_train.shape)
print(y_train.shape)

(4230, 64, 64, 1)
(4230, 61)


### `prep_pixels` function

In [185]:
def prep_pixels(data):
    # normalize to range 0-1
    data_norm = data/255.0
    # return normalized images
    return data_norm

In [186]:
print(X_train[0])
X_train = prep_pixels(X_train)
print(X_train[0])

[[[255.]
  [255.]
  [255.]
  ...
  [255.]
  [255.]
  [255.]]

 [[255.]
  [255.]
  [255.]
  ...
  [255.]
  [255.]
  [255.]]

 [[255.]
  [255.]
  [255.]
  ...
  [255.]
  [255.]
  [255.]]

 ...

 [[255.]
  [255.]
  [255.]
  ...
  [255.]
  [255.]
  [255.]]

 [[255.]
  [255.]
  [255.]
  ...
  [255.]
  [255.]
  [255.]]

 [[255.]
  [255.]
  [255.]
  ...
  [255.]
  [255.]
  [255.]]]
[[[1.]
  [1.]
  [1.]
  ...
  [1.]
  [1.]
  [1.]]

 [[1.]
  [1.]
  [1.]
  ...
  [1.]
  [1.]
  [1.]]

 [[1.]
  [1.]
  [1.]
  ...
  [1.]
  [1.]
  [1.]]

 ...

 [[1.]
  [1.]
  [1.]
  ...
  [1.]
  [1.]
  [1.]]

 [[1.]
  [1.]
  [1.]
  ...
  [1.]
  [1.]
  [1.]]

 [[1.]
  [1.]
  [1.]
  ...
  [1.]
  [1.]
  [1.]]]


### Try reshaping input data into shape (141, 30, 64, 64, 1)

In [219]:
X_train_reshaped = X_train.reshape(141,30,64,64,1)
X_train_reshaped.shape

(141, 30, 64, 64, 1)

#### define model: function `define_model`

In [233]:
def define_model():
    model = models.Sequential()
    model.add(layers.TimeDistributed(layers.Conv2D(32, (3,3), activation='relu',  kernel_initializer='he_uniform'), batch_input_shape=(30, 141, 64, 64, 1)))
    model.add(layers.TimeDistributed(layers.MaxPooling2D((2,2))))
    model.add(layers.TimeDistributed(layers.Conv2D(64, (3,3), activation='relu',  kernel_initializer='he_uniform')))
    model.add(layers.TimeDistributed(layers.MaxPooling2D((2,2))))
    model.add(layers.TimeDistributed(layers.Conv2D(64, (3,3), activation='relu',  kernel_initializer='he_uniform')))
    model.add(layers.TimeDistributed(layers.MaxPooling2D((2,2))))
    model.add(layers.TimeDistributed(layers.Flatten())) # sound vector
    model.add(layers.LSTM(50, return_sequences=True, batch_input_shape=(30, 141, 64, 64, 1)))
    model.add(layers.Dense(100, activation='relu', kernel_initializer='he_uniform')) # hidden layer
    model.add(layers.Dense(61, activation='softmax')) # output, 61 classes
    # compile model
    opt = optimizers.Adam(lr=0.01) # gradient descent with momentum optimizer
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [234]:
model = define_model()
print(model.summary())

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_71 (TimeDis (30, 141, 62, 62, 32)     320       
_________________________________________________________________
time_distributed_72 (TimeDis (30, 141, 31, 31, 32)     0         
_________________________________________________________________
time_distributed_73 (TimeDis (30, 141, 29, 29, 64)     18496     
_________________________________________________________________
time_distributed_74 (TimeDis (30, 141, 14, 14, 64)     0         
_________________________________________________________________
time_distributed_75 (TimeDis (30, 141, 12, 12, 64)     36928     
_________________________________________________________________
time_distributed_76 (TimeDis (30, 141, 6, 6, 64)       0         
_________________________________________________________________
time_distributed_77 (TimeDis (30, 141, 2304)         

In [230]:
# try to fit
history = model.fit(X_train_reshaped, y_train, epochs=10, batch_size=30)

ValueError: Data cardinality is ambiguous:
  x sizes: 141
  y sizes: 4230
Please provide data which shares the same first dimension.

#### evaluate model: functions `evaluate_model`, `summarize_diagnostics` and `summarize_performances`

In [200]:
def evaluate_model(dataX, dataY, n_folds=5):
    scores, histories = list(), list() # initializing to keep track
    # prep cross validation
    kfold = KFold(n_folds, shuffle=True, random_state=1)
    # enumerate splits
    for train_ix, test_ix in kfold.split(dataX):
        # define model
        model = define_model()
        # select rows to train/test
        trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
        # fit the model
        history = model.fit(trainX, trainY, epochs=10, batch_size=30, validation_data=(testX, testY), verbose=0)
        # evaluate model
        _, acc = model.evaluate(testX, testY, verbose=0) # returns loss value and eval metric, we're only interested in that second one
        print('> %.3f' % (acc * 100.0))
        # append scores
        scores.append(acc)
        histories.append(history)
    return scores, histories

In [201]:
# plot diagnostic learning curves
def summarize_diagnostics(histories):
    for i in range(len(histories)):
        # plot loss
        plt.subplot(211)
        plt.title('Cross Entropy Loss')
        plt.plot(histories[i].history['loss'], color='blue', label='train')
        plt.plot(histories[i].history['val_loss'], color='orange', label='test')
        # plot accuracy
        plt.subplot(212)
        plt.title('Classification Accuracy')
        plt.plot(histories[i].history['accuracy'], color='blue', label='train')
        plt.plot(histories[i].history['val_accuracy'], color='orange', label='test')
    plt.show()

In [202]:
def summarize_performance(scores):
    # print summary
    print('Accuracy: mean=%.3f std=%.3f, n=%d' % (mean(scores)*100, std(scores)*100, len(scores)))
    # box and whisker plots of results
    plt.boxplot(scores)
    plt.show()

#### Complete the whole baseline model evaluation `run_test_harness`

In [203]:
# run the test harness for evaluating a model
def run_test_harness():
    # load dataset
    X_train, y_train = load_dataset()
    # prepare pixel data
    X_train = prep_pixels(X_train)
    # evaluate model
    scores, histories = evaluate_model(X_train, y_train)
    # learning curves
    summarize_diagnostics(histories)
    # summarize estimated performance
    summarize_performance(scores)

In [204]:
run_test_harness()

ValueError: in user code:

    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:789 run_step  **
        outputs = model.train_step(data)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:747 train_step
        y_pred = self(x, training=True)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py:386 call
        outputs = layer(inputs, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/layers/wrappers.py:237 call
        y = self.layer(inputs, **kwargs)
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:975 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    /opt/conda/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:191 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer conv2d_6 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: [None, 64, 1]
