In [1]:
import hamr2016
import tensorflow as tf
import numpy as np
import tqdm

In [2]:
track = hamr2016.Track.from_file('../data/for_elise_by_beethoven.mid')
matrix = track.to_matrix()

In [3]:
num_midi = 88
placeholder_width = 16
num_filters = [1, 1]
intermediate_results = []

with tf.Graph().as_default() as graph:
    # Create a placeholder with indeterminate first dimension (batch size)
    # and append a single dimension for the convolutional layer
    input_placeholder = tf.placeholder(
        tf.float32, 
        (None, num_midi, placeholder_width, 1),
        'input_placeholder'
    )
    output_placeholder = tf.placeholder(
        tf.float32,
        (None, num_midi),
        'output_placeholder'
    )
    # Determine the previous number of channels
    previous = 1
    previous_output = input_placeholder
    # Apply multiple convolutional layers
    for i, current in enumerate(num_filters):
        with tf.variable_scope('conv2d_{}'.format(i)):
            # Create weights for a convolutional layer
            weights = tf.get_variable(
                'weights', [8, 8, previous, current],
                initializer=tf.truncated_normal_initializer()
            )
            # Apply the filter
            result = tf.nn.conv2d(
                previous_output, weights, [1, 1, 1, 1], 'VALID', name='result'
            )
            # Add a bias
            bias = tf.get_variable(
                'bias',
                initializer=tf.zeros_initializer(current)
            )
            result = tf.nn.bias_add(result, bias)
            # Apply relu
            result = tf.nn.relu(result)
            # Update the filter size and input
            previous_output = result
            previous = current
            intermediate_results.append(result)
            
    with tf.variable_scope('fully_connected'):
        # Flatten the last layer 
        size = np.prod(result.get_shape().as_list()[1:])
        fc_input = tf.reshape(result, (-1, size))
        weights = tf.get_variable(
            'weights', [size, num_midi],
            initializer=tf.truncated_normal_initializer()
        )
        bias = tf.get_variable('bias', initializer=tf.zeros_initializer(num_midi))
        logits = tf.nn.bias_add(tf.matmul(fc_input, weights), bias)

    # Compute the loss
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, output_placeholder)
    # Add an optimizer
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)
            
    init_op = tf.initialize_all_variables()
    
session = tf.Session(graph=graph)
session.run(init_op)

In [4]:
batch_size = None
features, output = matrix.next_batch(placeholder_width, batch_size)

In [5]:
features.shape

(780, 88, 16)

In [6]:
feed_dict = {
    input_placeholder: features[..., None], 
    output_placeholder: output
}
loss_value = session.run(loss, feed_dict)

In [None]:
for step in tqdm.trange(10000):
    _, loss_value = session.run([train, loss], feed_dict)
    if step % 100 == 0:
        tqdm.tqdm.write(str(np.mean(loss_value)))

  0%|          | 1/10000 [00:00<1:26:34,  1.92it/s]

1.83534


  1%|          | 61/10000 [00:30<1:24:25,  1.96it/s]