In [71]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Lambda
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Convolution2D, MaxPooling2D, Convolution1D
from keras.utils import np_utils

import pescador
from scipy.stats import zscore
import gzip
import os
import fnmatch

## Define data generators

In [3]:
def mil_bag_generator(cqtfile, actfile, n_bag_frames, min_active_frames, n_hop_frames, 
                      zscore_std=False, shuffle=True):
    '''
    Generate a MIL bag with corresponding label. 
    The function yields a dictionary with three elements: 
    X = features, Y = label, Z = bag ID (trackid + first frame index).

    Parameters
    ----------
    cqtfile : str
        Path to .npy.gz file containing the log-CQT matrix
    actfile : str
        Path to .npy.gz file containing the activation vector
    n_bag_frames : int
        Number of frames to include in a bag
    min_active_frames: int
        Minimum number of consecutive active frames to consider bag positive
    n_hop_frames : int
        Number of frames to jump between consecutive bags
    zscore : bool
        Whether or not to standardize the bag features (zscore)
    shuffle : bool
        Whether to shuffle the ordering of the bags (for sgd) or not (for
        validation and test)

    Returns
    -------
    bag : dictionary with X = features, Y = label, Z = bag ID
    '''
    # Load cqt file and ativation file
    cqt = np.load(gzip.open(cqtfile, 'rb'))
    act = np.load(gzip.open(actfile, 'rb'))

    order = np.arange(0, cqt.shape[1]-n_bag_frames, n_hop_frames) # librosa puts time in dim 1
    # Randomize frame order
    if shuffle:
        np.random.shuffle(order)
    # Get bag ID (from filename)
    trackid = '_'.join(os.path.basename(cqtfile).split('_')[:2])

    for frame in order:
        
        # Carve out TF patch, standardize (optional) and reshape
        patch = cqt[:, frame:frame + n_bag_frames]
        if zscore_std:
            patch = zscore(patch, axis=None)
        patch = patch.reshape(-1, patch.shape[0], patch.shape[1], 1)
        
        # Compute bag label
        patch_act = act[frame:frame+n_bag_frames]
        condition = patch_act >= 0.5
        # The following computes the length of every consecutive sequence
        # of True's in condition:
        active_lengths = np.diff(np.where(np.concatenate(
            ([condition[0]], condition[:-1] != condition[1:], [True])))[0])[::2]
        # Need at least min_active_frames to consider bag as positive
        if len(active_lengths) > 0:
            bag_label = 1 * (active_lengths.max() >= min_active_frames)
        else:
            bag_label = 0
        
        # Compute bag ID
        bagid = '{:s}_{:d}'.format(trackid, frame)    
        
        yield dict(
            X=patch,
            Y=np.asarray([bag_label], dtype=np.int32),
            ID=np.asarray([bagid]))

In [4]:
def batch_mux(streams, batch_size, n_samples=None, n_active=1000,
              with_replacement=False):
    '''
    Multiplex streams into batches of size n_batch

    Parameters
    ----------
    streams : list of pescador.Streamer
        The list of streams to multiplex
    batch_size : int > 0
        Number of samples to in each batch (batch size)
    n_samples : int or None
        Number of individual samples to generate (limit). If None, generate
        infinite number of samples (unless with_replacement is False in which
        case generate until all streams are exhausted)
    n_active : int > 0
        Number of streams that can be active simultaneously
    with_replacement : bool
        If true sample form streams indefinitely. If False streams are sampled
        until exhausted.

    Returns
    -------
    batch_streamer : pescador.Streamer
        Batch multiplexer
    '''

    stream_mux = pescador.Streamer(
        pescador.mux, streams, n_samples, n_active,
        with_replacement=with_replacement)

    batch_streamer = pescador.Streamer(
        pescador.buffer_streamer, stream_mux, batch_size)

    return batch_streamer

In [5]:
def vad_minibatch_generator(root_folder, track_list,
                            augmentations=['original'],
                            feature='cqt44100_1024_8_36', 
                            activation='vocal_activation44100_1024',
                            n_bag_frames=44,
                            min_active_frames=2,
                            n_hop_frames=22,
                            zscore_std=False,
                            shuffle=True,
                            batch_size=100, 
                            n_samples=None,
                            n_active=1000,
                            with_replacement=False):
    """

    Parameters
    ----------
    root_folder
    track_list
    augmentations
    feature
    activation
    n_bag_frames
    min_activate_frames
    n_hop_frames
    zscore_std
    shuffle
    batch_size
    n_samples
    n_active
    with_replacement

    Returns
    -------

    """
    # DEBUG
    print("Collecting feature files...")
    
    # Collect all feature files
    cqt_files = []
    for aug in augmentations:
        aug_folder = os.path.join(root_folder, aug, 'features', feature)
        for root, dirnames, filenames in os.walk(aug_folder):
            for filename in fnmatch.filter(filenames, '*cqt.npy.gz'):
                cqt_files.append(os.path.join(root, filename))
                
    # DEBUG
    print("Found {:d} files".format(len(cqt_files)))
    print("Creating streams...")

    # Turn all files into streams
    streams = []
    for cqtfile in cqt_files:
        # get matching activation file
        actfolder = os.path.join(os.path.dirname(os.path.dirname(cqtfile)), activation)
        actfile = os.path.join(actfolder, os.path.basename(cqtfile).replace('_cqt.npy.gz', 
                                                                            '_vocalactivation.npy.gz'))
        assert os.path.isfile(actfile)
        streams.append(pescador.Streamer(mil_bag_generator, cqtfile, actfile,
                                         n_bag_frames, min_active_frames, n_hop_frames, 
                                          zscore_std, shuffle))
        
    # DEBUG
    print("Done")

    # Mux the streams into minimbatches
    batch_streamer = batch_mux(streams, batch_size, n_samples=n_samples,
                               n_active=n_active, 
                               with_replacement=with_replacement)

    return batch_streamer

In [6]:
def keras_vad_minibatch_generator(root_folder, track_list,
                                  augmentations=['original'],
                                  feature='cqt44100_1024_8_36', 
                                  activation='vocal_activation44100_1024',
                                  n_bag_frames=44,
                                  min_active_frames=2,
                                  n_hop_frames=22,
                                  zscore_std=False,
                                  shuffle=True,
                                  batch_size=100, 
                                  n_samples=None,
                                  n_active=1000,
                                  with_replacement=False):
    """

    Parameters
    ----------
    root_folder
    track_list
    augmentations
    feature
    activation
    n_bag_frames
    min_activate_frames
    n_hop_frames
    zscore_std
    shuffle
    batch_size
    n_samples
    n_active
    with_replacement

    Returns
    -------

    """
    keras_generator = vad_minibatch_generator(
        root_folder, track_list, augmentations, feature, activation,
        n_bag_frames, min_active_frames, n_hop_frames, zscore_std,
        shuffle, batch_size, n_samples, n_active, with_replacement)
    
    for batch in keras_generator.generate():
        yield (batch['X'], batch['Y'])

## Construct generators

In [18]:
split = np.load('../data/dataSplits_7_1_2.pkl')

In [32]:
# TRAIN GENERATOR
root_folder = os.path.expanduser('~/datasets/MedleyDB_output/')
track_list = split[2][0]
augmentations = ['original']
feature = 'cqt44100_1024_8_36'
activation = 'vocal_activation44100_1024'
n_bag_frames = 44
min_active_frames = 2
n_hop_frames = 22
zscore_std= False
shuffle = True
batch_size = 100
n_samples = None
n_active = 1000
with_replacement = False

train_generator = keras_vad_minibatch_generator(
    root_folder, track_list, augmentations, feature, activation,
    n_bag_frames, min_active_frames, n_hop_frames, zscore_std,
    shuffle, batch_size, n_samples, n_active, with_replacement)

In [33]:
# VALIDATE GENERATOR
root_folder = os.path.expanduser('~/datasets/MedleyDB_output/')
track_list = split[2][1]
augmentations = ['original']
feature = 'cqt44100_1024_8_36'
activation = 'vocal_activation44100_1024'
n_bag_frames = 44
min_active_frames = 2
n_hop_frames = 22
zscore_std= False
shuffle = True
batch_size = 100
n_samples = None
n_active = 1000
with_replacement = False

validate_generator = keras_vad_minibatch_generator(
    root_folder, track_list, augmentations, feature, activation,
    n_bag_frames, min_active_frames, n_hop_frames, zscore_std,
    shuffle, batch_size, n_samples, n_active, with_replacement)

In [34]:
# DEBUG: get one batch
for n, batch in enumerate(train_generator):
    X_train = batch[0]
    Y_train = batch[1]
    break

Collecting feature files...
Found 37 files
Creating streams...
Done


In [35]:
print(X_train.shape, Y_train.shape)

(100, 288, 44, 1) (100,)


## Define softmax pooling layer

In [159]:
def _keras_smp(x):
    m = K.max(x, axis=-1, keepdims=True)
    sm = K.exp(x - m)
    w = sm / K.sum(sm, axis=-1, keepdims=True)
    return K.sum(x * w, axis=-1, keepdims=True)
    
def _keras_smp_shape(input_shape):
    shape = list(input_shape)
    shape[-1] = 1
    return tuple(shape)
#     return tuple(shape[:-1])
#     return (input_shape[0], 1)

SoftMaxPool = Lambda(_keras_smp, output_shape=_keras_smp_shape)

## Squeeze layer

In [160]:
def _keras_squeeze(x, axis=1):
    return K.squeeze(x, axis=axis)

def _keras_squeeze_shape(input_shape):
    shape = np.array(input_shape)
    shape = shape[shape!=1]
    return tuple(shape)

SqueezeLayer = Lambda(_keras_squeeze, output_shape=_keras_squeeze_shape, arguments={'axis': 1})
SqueezeLastLayer = Lambda(_keras_squeeze, output_shape=_keras_squeeze_shape, arguments={'axis': -1})

## Define model

In [161]:
# input TF dimensions
tf_rows, tf_cols = 288, 44
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
# pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)
fullheight_kernel_size = (tf_rows, 1)

In [162]:
if K.image_dim_ordering() == 'th':
    input_shape = (1, tf_rows, tf_cols)
else:
    input_shape = (tf_rows, tf_cols, 1)
print(input_shape)

(288, 44, 1)


In [163]:
inputs = Input(shape=input_shape)
y1 = Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same', activation='relu', name='y1')(inputs)
y2 = Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same', activation='relu', name='y2')(y1)
y3 = Convolution2D(nb_filters, fullheight_kernel_size[0], fullheight_kernel_size[1], 
                  border_mode='valid', activation='relu', name='y3')(y2)
y4 = SqueezeLayer(y3)
y5 = Convolution1D(1, 1, border_mode='valid', activation='sigmoid', name='y5')(y4)
y6 = SqueezeLastLayer(y5)
y7 = SoftMaxPool(y6)
predictions = Activation('sigmoid', name='predictions')(y7)

model = Model(input=inputs, output=predictions)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [164]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_23 (InputLayer)            (None, 288, 44, 1)    0                                            
____________________________________________________________________________________________________
y1 (Convolution2D)               (None, 288, 44, 32)   320         input_23[0][0]                   
____________________________________________________________________________________________________
y2 (Convolution2D)               (None, 288, 44, 32)   9248        y1[0][0]                         
____________________________________________________________________________________________________
y3 (Convolution2D)               (None, 1, 44, 32)     294944      y2[0][0]                         
___________________________________________________________________________________________

In [165]:
history = model.fit(X_train, Y_train, batch_size=10, nb_epoch=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

KeyboardInterrupt: 