In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.layers import Activation, Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential
from keras.utils import normalize, to_categorical
from keras.optimizers import Adam

from sklearn.preprocessing import LabelEncoder

import joblib

import pandas as pd
import numpy as np

from pathlib import Path

Using TensorFlow backend.


# Prepare Data

## Paths

In [4]:
train_data_path = Path('data/features/train_chroma_stft.joblib')
valid_data_path = Path('data/features/valid_chroma_stft.joblib')
test_data_path = Path('data/features/test_chroma_stft.joblib')
train_names_path = Path('data/features/train_name.joblib')
valid_names_path = Path('data/features/valid_name.joblib')
test_names_path = Path('data/features/test_name.joblib')
train_metadata_path = Path('data/nsynth-train/examples.json')
valid_metadata_path = Path('data/nsynth-valid/examples.json')
test_metadata_path = Path('data/nsynth-test/examples.json')

## Helpers

In [5]:
def encode_classes(metadata_paths):
    
    df = pd.read_json(metadata_paths[0], orient='index')
    
    for mdp in metadata_paths[1:]:
        df = df.append(pd.read_json(mdp, orient='index'))
        
    target = df['instrument_family_str'] + '_' + df['pitch'].astype('str')
    
    encoder = LabelEncoder()
    encoder.fit(target)
    
    return encoder


In [6]:
def prepare_data(data_path, names_path, metadata_path, encoder):
    
    data = joblib.load(data_path)
    names = joblib.load(names_path)
    metadata = pd.read_json(metadata_path, orient='index')
    
    df = pd.DataFrame({}, index=names).merge(metadata, how='left', left_index=True, right_index=True)
    target = df['instrument_family_str'] + '_' + df['pitch'].astype('str')
    target_enc = encoder.transform(target)
    
    X = data.reshape(data.shape + (1,))
    y = to_categorical(target_enc, len(encoder.classes_))
    
    return X, y

## Load datasets

In [7]:
metadata_paths = [train_metadata_path, valid_metadata_path, test_metadata_path]
encoder = encode_classes(metadata_paths)

In [8]:
X_train, y_train = prepare_data(train_data_path, train_names_path, train_metadata_path, encoder)
X_valid, y_valid = prepare_data(valid_data_path, valid_names_path, valid_metadata_path, encoder)
X_test, y_test = prepare_data(test_data_path, test_names_path, test_metadata_path, encoder)

In [9]:
X_train.shape, y_train.shape

((289205, 50, 94, 1), (289205, 1098))

In [10]:
X_valid.shape, y_valid.shape

((12678, 50, 94, 1), (12678, 1098))

In [11]:
X_test.shape, y_test.shape

((4096, 50, 94, 1), (4096, 1098))

# Model

## Parameters

In [12]:
num_classes = len(encoder.classes_)
input_shape = X_train[0].shape
epochs=200
batch_size=128

## Model Build

In [13]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [14]:
model.compile(loss='categorical_crossentropy',
              optimizer='Adadelta',
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 48, 92, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 46, 90, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 23, 45, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 23, 45, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 66240)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               8478848   
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
__________

In [15]:
filepath = 'weights/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5'

checkpoint = ModelCheckpoint(filepath, save_best_only=True)

earlystopping = EarlyStopping(patience=5)

log_dir = Path('./logs')
try:
    for log in log_dir.glob('events.out.*'):
        log.unlink()
    print('Previous logs cleared.')
except Exception:
    print('Error cleaning previous logs.')

tensorboard = TensorBoard(log_dir=str(log_dir), update_freq=1000)
print('Log dir:', str(log_dir.absolute()))

callbacks_list = [checkpoint, earlystopping, tensorboard]

Previous logs cleared.
Log dir: /home/emredjan/stack/data/audio-to-midi/logs


In [None]:
model.fit(X_train, 
          y_train, 
          epochs=epochs, 
          batch_size=batch_size, 
          callbacks=callbacks_list, 
          validation_data=(X_valid, y_valid), 
          verbose=1)

Train on 289205 samples, validate on 12678 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
  2048/289205 [..............................] - ETA: 2:15 - loss: 2.0560 - acc: 0.4370