In [1]:
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, LambdaCallback
from keras.callbacks import EarlyStopping, TensorBoard
import argparse
import midi
import os

from constants import *
from dataset import load_all
from generate import write_file, generate
from play_music_util import play_music

import pygame
import base64

from playsound import playsound

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import numpy as np
import tensorflow as tf
from keras.layers import Input, LSTM, Dense, Dropout, Lambda, Reshape, Permute
from keras.layers import TimeDistributed, RepeatVector, Conv1D, Activation
from keras.layers import Embedding, Flatten, dot, concatenate 
from keras.layers.merge import Concatenate, Add, Multiply
from keras.models import Model
import keras.backend as K
from keras import losses
from keras.utils import multi_gpu_model
from 

In [3]:
styles[0]

['data/test']

In [4]:
print('Loading data')
train_data, train_labels = load_all(styles, BATCH_SIZE, SEQ_LEN)

Loading data


In [5]:
for i in range(3):
    print(train_data[i].shape)

(10, 128, 48, 3)
(10, 128, 48, 3)
(10, 128, 16)


In [6]:
from model import *

NameError: name 'Layer' is not defined

# 

In [7]:
def primary_loss(y_true, y_pred):
    maj_row = ~np.array([1,0,1,0,1,1,0,1,0,1,0,1])
    min_row = ~np.array([1,0,1,1,0,1,0,1,1,0,1,0])
    maj_mask = np.array([np.roll(maj_row,i) for i in range(12)])
    min_mask = np.array([np.roll(min_row,i) for i in range(12)])
    ton_mask = np.vstack((maj_mask,min_mask))
    ton_mask = K.constant(ton_mask)
    # 3 separate loss calculations based on if note is played or not
    played = y_true[:, :, :, 0]
    harmony = K.sum(K.reshape(played,(-1,SEQ_LEN,OCTAVE,NUM_OCTAVES)), axis = -1)
    #print('harmony', harmony.shape)
    #print('ton_mask', ton_mask.shape)
#     harmony_dot = K.dot([harmony, ton_mask], (-1, -1))
#     harmony_dot = tf.matmul(harmony, ton_mask, transpose_b = True)
    harmony_reshape = K.reshape(harmony, (-1, 12))
    #print('harmony_reshape', harmony_reshape)
    harmony_dot = tf.matmul(harmony_reshape,ton_mask , transpose_b=True)
    #print(harmony_dot.shape)
    harmony_loss = K.min(harmony_dot, axis=-1)
    #print('harmony_loss', harmony_loss.shape)
    played = y_true[:, :, :, 0]
    
#     bce_note = losses.binary_crossentropy(y_true[:, -1, :, 0],  y_pred[:, -1, :, 0])
#     bce_replay = losses.binary_crossentropy(y_true[:, -1, :, 1], tf.multiply(played, y_pred[:, -1, :, 1]) + tf.multiply(1 - played, y_true[:, -1, :, 1]))
#     mse = losses.mean_squared_error(y_true[:, -1, :, 2], tf.multiply(played, y_pred[:, -1, :, 2]) + tf.multiply(1 - played, y_true[:, -1, :, 2]))

    bce_note = losses.binary_crossentropy(y_true[:, :, :, 0],  y_pred[:, :, :, 0])
    bce_replay = losses.binary_crossentropy(y_true[:, :, :, 1], tf.multiply(played, y_pred[:, :, :, 1]) + tf.multiply(1 - played, y_true[:, :, :, 1]))
    mse = losses.mean_squared_error(y_true[:, :, :, 2], tf.multiply(played, y_pred[:, :, :, 2]) + tf.multiply(1 - played, y_true[:, :, :, 2]))
    har_mse = losses.mean_squared_error(harmony_loss, 0)
    
    return bce_note + bce_replay + mse + har_mse/10


def pitch_pos_in_f(time_steps):
    """
    Returns a constant containing pitch position of each note
    """
    def f(x):
        note_ranges = tf.range(NUM_NOTES, dtype='float32') / NUM_NOTES
        repeated_ranges = tf.tile(note_ranges, [tf.shape(x)[0] * time_steps])
        return tf.reshape(repeated_ranges, [tf.shape(x)[0], time_steps, NUM_NOTES, 1])
    return f

def pitch_class_in_f(time_steps):
    """
    Returns a constant containing pitch class of each note
    """
    def f(x):
        pitch_class_matrix = np.array([one_hot(n % OCTAVE, OCTAVE) for n in range(NUM_NOTES)])
        pitch_class_matrix = tf.constant(pitch_class_matrix, dtype='float32')
        pitch_class_matrix = tf.reshape(pitch_class_matrix, [1, 1, NUM_NOTES, OCTAVE])
        return tf.tile(pitch_class_matrix, [tf.shape(x)[0], time_steps, 1, 1])
    return f

def pitch_bins_f(time_steps):
    def f(x):
        bins = tf.reduce_sum([x[:, :, i::OCTAVE, 0] for i in range(OCTAVE)], axis=3)
        bins = tf.tile(bins, [NUM_OCTAVES, 1, 1])
        bins = tf.reshape(bins, [tf.shape(x)[0], time_steps, NUM_NOTES, 1])
        return bins
    return f

def build_models(time_steps=SEQ_LEN, input_dropout=0.2, dropout=0.5):
    notes_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))
    beat_in = Input((time_steps, NOTES_PER_BAR))
    # Target input for conditioning
    chosen_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))

    # Dropout inputs
    notes = Dropout(input_dropout)(notes_in)
    beat = Dropout(input_dropout)(beat_in)
    chosen = Dropout(input_dropout)(chosen_in)

    """ Time axis """
    time_out = time_axis(dropout)(notes, beat)

    """ Note Axis & Prediction Layer """
    naxis = note_axis(dropout)
    notes_out = naxis(time_out, chosen)

    model = Model([notes_in, chosen_in, beat_in], [notes_out])

    if len(K.tensorflow_backend._get_available_gpus())>=2:
        model = multi_gpu_model(model)

    model.compile(optimizer='nadam', loss=[primary_loss])

    """ Generation Models """
    time_model = Model([notes_in, beat_in], [time_out])

    note_features = Input((1, NUM_NOTES, TIME_AXIS_UNITS), name='note_features')
    chosen_gen_in = Input((1, NUM_NOTES, NOTE_UNITS), name='chosen_gen_in')
 
    # Dropout inputs
    chosen_gen = Dropout(input_dropout)(chosen_gen_in)
    
    note_gen_out = naxis(note_features, chosen_gen)
    
    note_model = Model([note_features, chosen_gen_in], note_gen_out)

    return model, time_model, note_model


def build_models_with_attention(time_steps=SEQ_LEN, input_dropout=0.2, dropout=0.5):
    #print(SEQ_LEN)
    notes_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))
    beat_in = Input((time_steps, NOTES_PER_BAR))
    # Target input for conditioning
    chosen_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))

    # Dropout inputs
    notes = Dropout(input_dropout)(notes_in)
    beat = Dropout(input_dropout)(beat_in)
    chosen = Dropout(input_dropout)(chosen_in)

    """ Time axis """
    time_out = time_axis(dropout)(notes, beat)
    #print('time_out', time_out.shape)

    """ Note Axis & Prediction Layer """
    naxis = note_axis_attention(dropout)
    notes_out = naxis(time_out)
    
    model = Model([notes_in, chosen_in, beat_in], [notes_out])

    if len(K.tensorflow_backend._get_available_gpus())>=2:
        model = multi_gpu_model(model)

    model.compile(optimizer='nadam', loss=[primary_loss])
    
    """ Generation Models """
    time_model = Model([notes_in, beat_in], [time_out])

    note_features = Input((1, NUM_NOTES, TIME_AXIS_UNITS), name='note_features')
    chosen_gen_in = Input((1, NUM_NOTES, NOTE_UNITS), name='chosen_gen_in')
   
    # Dropout inputs
    chosen_gen = Dropout(input_dropout)(chosen_gen_in)
    
    #print('NUM_NOTES', NUM_NOTES)
    note_gen_out = naxis(note_features)
    
    note_model = Model([note_features, chosen_gen_in], note_gen_out)

    return model, time_model, note_model

def note_axis_attention(dropout):
    note_dense_att = Dense(2, activation='sigmoid', name='note_dense_att')
    volume_dense_att = Dense(1, name='volume_dense_att')

    def f(x):
        x = attention_layer(x, x, True, DENSE_SIZE, N_HEADS, PROJECTION_DIM)
        #print('x_att', x.shape)
        x = Dropout(dropout)(x)
        #print('x_drop', x.get_shape)
        #print('the end')
        #print('dense_vol', v.shape)
  
        return Concatenate(axis=-1)([note_dense_att(x), volume_dense_att(x)])
    
    return f


In [8]:
def time_axis(dropout):
    def f(notes, beat):
        time_steps = int(notes.get_shape()[1])

        # TODO: Experiment with when to apply conv
        note_octave = TimeDistributed(Conv1D(OCTAVE_UNITS, 2 * OCTAVE, padding='same'))(notes)
        note_octave = Activation('tanh')(note_octave)
        note_octave = Dropout(dropout)(note_octave)

        # Create features for every single note.
        note_features = Concatenate()([
            Lambda(pitch_pos_in_f(time_steps))(notes),
            Lambda(pitch_class_in_f(time_steps))(notes),
            Lambda(pitch_bins_f(time_steps))(notes),
            note_octave,
            TimeDistributed(RepeatVector(NUM_NOTES))(beat)
        ])

        x = note_features
        if self_attention: 
            note_features = attention_layer(note_features, note_features, 
                           True, DENSE_SIZE = 94, N_HEADS = 2, PROJECTION_DIM = 47)
  
        # [batch, notes, time, features]
        x = Permute((2, 1, 3))(x)

        # Apply LSTMs
        for l in range(TIME_AXIS_LAYERS):

            x = TimeDistributed(LSTM(TIME_AXIS_UNITS, return_sequences=True))(x)
            x = Dropout(dropout)(x)

        # [batch, time, notes, features]
        return Permute((2, 1, 3))(x)
    return f

def note_axis(dropout):
    lstm_layer_cache = {}
    note_dense = Dense(2, activation='sigmoid', name='note_dense')
    volume_dense = Dense(1, name='volume_dense')

    def f(x, chosen):
        time_steps = int(x.get_shape()[1])
        
        if MASK:
            #print('x', x.shape)
            x_att = Permute((2,1,3))(x)
            #print('x_att', x_att.shape)
            x_att = Reshape((time_steps*NUM_NOTES, TIME_AXIS_UNITS))(x_att)
            #print('x_att', x_att.shape)
            x_att = attention_layer(x_att, x_att, True, DENSE_SIZE, N_HEADS, PROJECTION_DIM)
            x_att = Reshape((NUM_NOTES, time_steps, TIME_AXIS_UNITS))(x_att)
            x = Permute((2,1,3))(x_att)
        
        
        if self_attention: attention_layer(x, x, True, DENSE_SIZE, N_HEADS, PROJECTION_DIM)


        # Shift target one note to the left.
        shift_chosen = Lambda(lambda x: tf.pad(x[:, :, :-1, :], [[0, 0], [0, 0], [1, 0], [0, 0]]))(chosen)

        # [batch, time, notes, features + 1]
        #print('x', x.shape)
        #print('shift_chosen', shift_chosen.shape)
        x = Concatenate(axis=3)([x, shift_chosen])


        for l in range(NOTE_AXIS_LAYERS):
            if l not in lstm_layer_cache:
                lstm_layer_cache[l] = LSTM(NOTE_AXIS_UNITS, return_sequences=True)

            x = TimeDistributed(lstm_layer_cache[l])(x)
            x = Dropout(dropout)(x)
            
        #print('x', x.shape)  
        #print('nx', note_dense(x).shape)
        
        return Concatenate()([note_dense(x), volume_dense(x)])
    return f


def OneHeadAttention(a_drop, q_drop, PROJECTION_DIM, drop_ratio=0.5,):
        
    a_proj = Dense(PROJECTION_DIM, use_bias=False, kernel_initializer='glorot_normal')(a_drop)
    q_proj = Dense(PROJECTION_DIM, use_bias=False, kernel_initializer='glorot_normal')(q_drop)
    v_proj = Dense(PROJECTION_DIM, use_bias=False, kernel_initializer='glorot_normal')(a_drop)
    
    a_proj = Dropout(drop_ratio)(a_proj)
    q_proj = Dropout(drop_ratio)(q_proj)
    v_proj = Dropout(drop_ratio)(v_proj)
    #print('a_proj', a_proj.shape)
    
    #n = Dense(2)(v_proj)
    #print('dense_note', n.shape)
  
    att_input = Lambda(lambda x: tf.matmul(x[0],x[1], transpose_b=True))([q_proj, a_proj])
    #print('att_input', att_input.shape)
    if MASK:
        time_steps = int(att_input.get_shape()[1])//NUM_NOTES
        #print('time_steps', time_steps)
        #print('att_input', att_input.shape)
        att_input = Reshape((NUM_NOTES, time_steps, time_steps*NUM_NOTES))(att_input)
        #print('att_input_reshape', att_input.shape)
        time_mask = []
        for i in range(time_steps):
            time_mask.append(Lambda(lambda x: tf.pad(x[:, :, i:i+1, 0:(i+1)*NUM_NOTES], 
                [[0, 0], [0, 0],[0, 0],  [0, NUM_NOTES*time_steps-(i+1)*NUM_NOTES]]))(att_input))
        
        if time_steps>1:
            att_input = Concatenate(axis=2)(time_mask)
        #print('att_input_pad', att_input.shape)
        att_input = Reshape((time_steps*NUM_NOTES, time_steps*NUM_NOTES))(att_input)

    att_weights = Activation('softmax')(att_input)
    v_new = Lambda(lambda x: tf.matmul(x[0],x[1]))([att_weights, v_proj])
    #tf.matmul(att_weights, v_proj)
    #print('v_new', v_new.get_shape)
     
    v_new = Multiply()([q_proj, v_new])
    #print('end onehed')
    
    return v_new

def MultyHeadAttention(a_drop, q_drop, DENSE_SIZE, N_HEADS, PROJECTION_DIM):

    Attention_heads = []
    for i in range(N_HEADS):
        Attention_heads.append(OneHeadAttention(a_drop, q_drop, PROJECTION_DIM))
        
    BigHead = concatenate(Attention_heads, axis=-1)
    #print('BigHead', BigHead.shape)   

    attention_output = Dense(DENSE_SIZE, use_bias=False)(BigHead)
    #print('attention_output', attention_output.shape)

           
    return attention_output
    
def attention_layer(a_drop, q_drop, FF, DENSE_SIZE, N_HEADS, PROJECTION_DIM):
    
    #print('a_drop', a_drop.shape)
    res = MultyHeadAttention(a_drop, q_drop, DENSE_SIZE, N_HEADS, PROJECTION_DIM)
    #print('res', res.shape)
        
    att = Add()([a_drop, res])
    #att = normalize()(att)    
 
    #Feed Forward
    if FF:
        att_ff = Dense(DENSE_SIZE*4, activation = 'relu')(att)
        att_ff = Dense(DENSE_SIZE)(att_ff)   
        att_ff = Dropout(0.1)(att_ff)
        att = Add()([att, att_ff])
        #att = normalize()(att_add) 
    
    return att


In [10]:
# MASK = True
# self_attention = True
# attention_in_note = False
# models = build_models(time_steps=SEQ_LEN, 
#                                      input_dropout=0.2, dropout=0.5)

# With Attention

In [11]:
MASK = False
self_attention = True
attention_in_note = False
models = build_models_with_attention(time_steps=SEQ_LEN, 
                                     input_dropout=0.2, dropout=0.5)

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead


In [13]:
cbs = [
    ModelCheckpoint(os.path.join(OUT_DIR, 'model_self_test.h5'), monitor='loss', save_best_only=True, save_weights_only=True),
    EarlyStopping(monitor='loss', patience=5),
    TensorBoard(log_dir='out/logs', histogram_freq=1)
]

print('Training')
models[0].fit(train_data, train_labels, validation_split=0.05,
              epochs=2, callbacks=cbs, batch_size=1)


Training
Train on 9 samples, validate on 1 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1c355d0940>

In [14]:
models = build_models_with_attention(time_steps=SEQ_LEN, 
                                     input_dropout=0.2, dropout=0.5)
models[0].load_weights(os.path.join(OUT_DIR, 'model_self_test.h5'))
write_file('output/test_self', generate(models, 4, Attention = True))

  0%|          | 0/64 [00:00<?, ?it/s]

Generating with no styles:


100%|██████████| 64/64 [00:18<00:00,  3.42it/s]

Writing file out/samples/output/test_self_0.mid





In [15]:
midi_file = 'out/samples/output/test_self_0.mid'
play_music(midi_file)

Music file out/samples/output/test_self_0.mid loaded!


# Canonical with full attention

In [16]:
MASK = False
self_attention = True
attention_in_note = True
models = build_models(input_dropout=0.2, dropout=0.5)

In [18]:
cbs = [
    ModelCheckpoint(os.path.join(OUT_DIR, 'model_matrix_self.h5'), monitor='loss', save_best_only=True, save_weights_only=True),
    EarlyStopping(monitor='loss', patience=5),
    #TensorBoard(log_dir='out/logs', histogram_freq=1)
]

print('Training')
models[0].fit(train_data, train_labels, validation_split=0.05,
              epochs=2, callbacks=cbs, batch_size=1)


Training
Train on 9 samples, validate on 1 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1c42543320>

In [19]:
models = build_models()
models[0].load_weights(os.path.join(OUT_DIR, 'model_matrix_self.h5'))
write_file('output/test_overall_att', generate(models, 4))

  0%|          | 0/64 [00:00<?, ?it/s]

Generating with no styles:


100%|██████████| 64/64 [00:52<00:00,  1.21it/s]

Writing file out/samples/output/test_overall_att_0.mid





In [36]:
midi_file = 'out/samples/output/test_overall_att_0.mid'
play_music(midi_file)

Music file out/samples/output/test_overall_att_0.mid loaded!


# Canonical

In [21]:
MASK = False
self_attention = False
attention_in_note = False
models = build_models()

In [22]:
cbs = [
    ModelCheckpoint(os.path.join(OUT_DIR, 'model_canonical.h5'), monitor='loss', save_best_only=True, save_weights_only=True),
    EarlyStopping(monitor='loss', patience=5),
    #TensorBoard(log_dir='out/logs', histogram_freq=1)
]

print('Training')
models[0].fit(train_data, train_labels, validation_split=0.05,
              epochs=2, callbacks=cbs, batch_size=1)


Training
Train on 9 samples, validate on 1 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1c4b257630>

In [23]:
models = build_models()
models[0].load_weights(os.path.join(OUT_DIR, 'model_canonical.h5'))
write_file('output/canonical_test', generate(models, 4))

  0%|          | 0/64 [00:00<?, ?it/s]

Generating with no styles:


100%|██████████| 64/64 [00:55<00:00,  1.16it/s]

Writing file out/samples/output/canonical_test_0.mid





In [21]:
midi_file = 'out/samples/output/canonical_test_0.mid'
play_music(midi_file)

Music file out/samples/output/canonical_test_0.mid loaded!


# Testing keras function

In [9]:
fpath = os.path.join(SAMPLES_DIR, 'encode_decoded_song' + '_' + str(i) + '.mid')
pattern = midi.read_midifile('data/Bach1/Toccata & Fuga in F-Dur, BWV 540.mid')
result = midi_decode(pattern)
mf = midi_encode(unclamp_midi(clamp_midi(result)))
midi.write_midifile(fpath, mf)

In [51]:
A = np.array([[np.ones((2,4)), np.eye(4)[:2]]])
A = K.variable(A)
print(K.eval(A))
print('A', A.shape)
# x = Dense(4, kernel_initializer='Ones')(A)\
#print('slice x', K.eval(A[:, :, :-1, :]))

time_steps = int(A.get_shape()[2])
note_dim = int(A.get_shape()[1])

print('time_steps', time_steps)
print('A', K.eval(A[:, :, 0, :]))

time_mask = []
for i in range(time_steps):
    time_mask.append(Lambda(lambda x: tf.pad(x[:, :, i:i+1, 0:(i+1)*note_dim], 
        [[0, 0], [0, 0],[0, 0],  [0, note_dim*time_steps-(i+1)*note_dim]]))(A))
# x = Lambda(lambda x: tf.pad(x[:, :, :-1, :], 
#                         [[0, 0], [0, 0], [1, 0], [0, 0]]))(A)


print('x0', time_mask[0].shape)
print('x0', K.eval(time_mask[0]))

print('x1', time_mask[1].shape)
print('x1 ', K.eval(time_mask[1]))

x = Concatenate(axis=2)(time_mask)

print('x', x.shape)
print('x ', K.eval(x))

[[[[1. 1. 1. 1.]
   [1. 1. 1. 1.]]

  [[1. 0. 0. 0.]
   [0. 1. 0. 0.]]]]
A (1, 2, 2, 4)
time_steps 2
A [[[1. 1. 1. 1.]
  [1. 0. 0. 0.]]]
x0 (1, 2, 1, 4)
x0 [[[[1. 1. 0. 0.]]

  [[1. 0. 0. 0.]]]]
x1 (1, 2, 1, 4)
x1  [[[[1. 1. 1. 1.]]

  [[0. 1. 0. 0.]]]]
x (1, 2, 2, 4)
x  [[[[1. 1. 0. 0.]
   [1. 1. 1. 1.]]

  [[1. 0. 0. 0.]
   [0. 1. 0. 0.]]]]


In [24]:
# models[0].save_weights(os.path.join(OUT_DIR, 'raw_model.h5'))

In [19]:
# models = build_models()
# models[0].load_weights(MODEL_FILE)
# write_file('output2', generate(models, 4, styles))

In [27]:
# import numpy as np
# import tensorflow as tf
# from keras.layers import Input, LSTM, Dense, Dropout, Lambda, Reshape, Permute
# from keras.layers import TimeDistributed, RepeatVector, Conv1D, Activation
# from keras.layers import Embedding, Flatten, dot, concatenate 
# from keras.layers.merge import Concatenate, Add, Multiply
# from keras.models import Model
# import keras.backend as K
# from keras import losses

# from util import *
# from constants import *

# from keras.utils import multi_gpu_model

# def primary_loss(y_true, y_pred):
#     # 3 separate loss calculations based on if note is played or not
#     played = y_true[:, :, :, 0]
#     harmony = K.sum(K.reshape(played,(-1,128,12,4)), axis = -1)
#     bce_note = losses.binary_crossentropy(y_true[:, :, :, 0], y_pred[:, :, :, 0])
#     bce_replay = losses.binary_crossentropy(y_true[:, :, :, 1], tf.multiply(played, y_pred[:, :, :, 1]) + tf.multiply(1 - played, y_true[:, :, :, 1]))
#     mse = losses.mean_squared_error(y_true[:, :, :, 2], tf.multiply(played, y_pred[:, :, :, 2]) + tf.multiply(1 - played, y_true[:, :, :, 2]))
#     return bce_note + bce_replay + mse

# def pitch_pos_in_f(time_steps):
#     """
#     Returns a constant containing pitch position of each note
#     """
#     def f(x):
#         note_ranges = tf.range(NUM_NOTES, dtype='float32') / NUM_NOTES
#         repeated_ranges = tf.tile(note_ranges, [tf.shape(x)[0] * time_steps])
#         return tf.reshape(repeated_ranges, [tf.shape(x)[0], time_steps, NUM_NOTES, 1])
#     return f

# def pitch_class_in_f(time_steps):
#     """
#     Returns a constant containing pitch class of each note
#     """
#     def f(x):
#         pitch_class_matrix = np.array([one_hot(n % OCTAVE, OCTAVE) for n in range(NUM_NOTES)])
#         pitch_class_matrix = tf.constant(pitch_class_matrix, dtype='float32')
#         pitch_class_matrix = tf.reshape(pitch_class_matrix, [1, 1, NUM_NOTES, OCTAVE])
#         return tf.tile(pitch_class_matrix, [tf.shape(x)[0], time_steps, 1, 1])
#     return f

# def pitch_bins_f(time_steps):
#     def f(x):
#         bins = tf.reduce_sum([x[:, :, i::OCTAVE, 0] for i in range(OCTAVE)], axis=3)
#         bins = tf.tile(bins, [NUM_OCTAVES, 1, 1])
#         bins = tf.reshape(bins, [tf.shape(x)[0], time_steps, NUM_NOTES, 1])
#         return bins
#     return f

# def time_axis(dropout):
#     def f(notes, beat):
#         time_steps = int(notes.get_shape()[1])

#         # TODO: Experiment with when to apply conv
#         note_octave = TimeDistributed(Conv1D(OCTAVE_UNITS, 2 * OCTAVE, padding='same'))(notes)
#         note_octave = Activation('tanh')(note_octave)
#         note_octave = Dropout(dropout)(note_octave)

#         # Create features for every single note.
#         note_features = Concatenate()([
#             Lambda(pitch_pos_in_f(time_steps))(notes),
#             Lambda(pitch_class_in_f(time_steps))(notes),
#             Lambda(pitch_bins_f(time_steps))(notes),
#             note_octave,
#             TimeDistributed(RepeatVector(NUM_NOTES))(beat)
#         ])

#         x = note_features
#         # [batch, notes, time, features]
#         x = Permute((2, 1, 3))(x)

#         # Apply LSTMs
#         for l in range(TIME_AXIS_LAYERS):

#             x = TimeDistributed(LSTM(TIME_AXIS_UNITS, return_sequences=True))(x)
#             x = Dropout(dropout)(x)

#         # [batch, time, notes, features]
#         return Permute((2, 1, 3))(x)
#     return f

# def note_axis(dropout):
#     lstm_layer_cache = {}
#     note_dense = Dense(2, activation='sigmoid', name='note_dense')
#     volume_dense = Dense(1, name='volume_dense')

#     def f(x, chosen):
#         time_steps = int(x.get_shape()[1])

#         # Shift target one note to the left.
#         shift_chosen = Lambda(lambda x: tf.pad(x[:, :, :-1, :], [[0, 0], [0, 0], [1, 0], [0, 0]]))(chosen)

#         # [batch, time, notes, features + 1]
#         x = Concatenate(axis=3)([x, shift_chosen])


#         for l in range(NOTE_AXIS_LAYERS):
#             if l not in lstm_layer_cache:
#                 lstm_layer_cache[l] = LSTM(NOTE_AXIS_UNITS, return_sequences=True)

#             x = TimeDistributed(lstm_layer_cache[l])(x)
#             x = Dropout(dropout)(x)
            
#         #print('x', x.shape)  
#         #print('nx', note_dense(x).shape)
        
#         return Concatenate()([note_dense(x), volume_dense(x)])
#     return f

# def build_models(time_steps=SEQ_LEN, input_dropout=0.2, dropout=0.5):
#     notes_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))
#     beat_in = Input((time_steps, NOTES_PER_BAR))
#     # Target input for conditioning
#     chosen_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))

#     # Dropout inputs
#     notes = Dropout(input_dropout)(notes_in)
#     beat = Dropout(input_dropout)(beat_in)
#     chosen = Dropout(input_dropout)(chosen_in)

#     """ Time axis """
#     time_out = time_axis(dropout)(notes, beat)

#     """ Note Axis & Prediction Layer """
#     naxis = note_axis(dropout)
#     notes_out = naxis(time_out, chosen)

#     model = Model([notes_in, chosen_in, beat_in], [notes_out])

#     if len(K.tensorflow_backend._get_available_gpus())>=2:
#         model = multi_gpu_model(model)

#     model.compile(optimizer='nadam', loss=[primary_loss])

#     """ Generation Models """
#     time_model = Model([notes_in, beat_in], [time_out])

#     note_features = Input((1, NUM_NOTES, TIME_AXIS_UNITS), name='note_features')
#     chosen_gen_in = Input((1, NUM_NOTES, NOTE_UNITS), name='chosen_gen_in')
#     style_gen_in = Input((1, NUM_STYLES), name='style_in')

#     # Dropout inputs
#     chosen_gen = Dropout(input_dropout)(chosen_gen_in)
    
#     note_gen_out = naxis(note_features, chosen_gen)
    
#     note_model = Model([note_features, chosen_gen_in], note_gen_out)

#     return model, time_model, note_model


# def build_models_with_attention(time_steps=SEQ_LEN, input_dropout=0.2, dropout=0.5):
#     notes_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))
#     beat_in = Input((time_steps, NOTES_PER_BAR))
#     # Target input for conditioning
#     chosen_in = Input((time_steps, NUM_NOTES, NOTE_UNITS))

#     # Dropout inputs
#     notes = Dropout(input_dropout)(notes_in)
#     beat = Dropout(input_dropout)(beat_in)
#     chosen = Dropout(input_dropout)(chosen_in)

#     """ Time axis """
#     time_out = time_axis(dropout)(notes, beat)
#     #print('time_out', time_out.shape)

#     """ Note Axis & Prediction Layer """
#     naxis = note_axis_attention(dropout)
#     notes_out = naxis(time_out)
    
#     model = Model([notes_in, chosen_in, beat_in], [notes_out])

#     if len(K.tensorflow_backend._get_available_gpus())>=2:
#         model = multi_gpu_model(model)

#     model.compile(optimizer='nadam', loss=[primary_loss])
    
#     """ Generation Models """
#     time_model = Model([notes_in, beat_in], [time_out])

#     note_features = Input((1, NUM_NOTES, TIME_AXIS_UNITS), name='note_features')
#     chosen_gen_in = Input((1, NUM_NOTES, NOTE_UNITS), name='chosen_gen_in')
   
#     # Dropout inputs
#     chosen_gen = Dropout(input_dropout)(chosen_gen_in)
    
#     #print('NUM_NOTES', NUM_NOTES)
#     note_gen_out = naxis(note_features)
    
#     note_model = Model([note_features, chosen_gen_in], note_gen_out)

#     return model, time_model, note_model

# def note_axis_attention(dropout):
#     note_dense_att = Dense(2, activation='sigmoid', name='note_dense_att')
#     volume_dense_att = Dense(1, name='volume_dense_att')

#     def f(x):
#         x = attention_layer(x, x, True)
#         #print('x_att', x.shape)
#         x = Dropout(dropout)(x)
#         #print('x_drop', x.get_shape)

#         v = volume_dense_att(x)
        
#         #print('the end')
#         #print('dense_vol', v.shape)
  
#         return Concatenate(axis=-1)([note_dense_att(x), volume_dense_att(x)])
    
#     return f

# def OneHeadAttention(a_drop, q_drop, drop_ratio=0.5):
        
#     a_proj = Dense(PROJECTION_DIM, use_bias=False, kernel_initializer='glorot_normal')(a_drop)
#     q_proj = Dense(PROJECTION_DIM, use_bias=False, kernel_initializer='glorot_normal')(q_drop)
#     v_proj = Dense(PROJECTION_DIM, use_bias=False, kernel_initializer='glorot_normal')(a_drop)
    
#     a_proj = Dropout(drop_ratio)(a_proj)
#     q_proj = Dropout(drop_ratio)(q_proj)
#     v_proj = Dropout(drop_ratio)(v_proj)
#     #print('a_proj', a_proj.shape)
    
#     #n = Dense(2)(v_proj)
#     #print('dense_note', n.shape)
 
    
#     att_input = Lambda(lambda x: tf.matmul(x[0],x[1], transpose_b=True))([q_proj, a_proj])
#     #print('att_input', att_input.shape)


#     att_weights = Activation('softmax')(att_input)
#     v_new = Lambda(lambda x: tf.matmul(x[0],x[1]))([att_weights, v_proj])
#     #tf.matmul(att_weights, v_proj)
#     #print('v_new', v_new.get_shape)
     
#     v_new = Multiply()([q_proj, v_new])
    
#     return v_new

# def MultyHeadAttention(a_drop, q_drop):

#     Attention_heads = []
#     for i in range(N_HEADS):
#         Attention_heads.append(OneHeadAttention(a_drop, q_drop))
        
#     BigHead = concatenate(Attention_heads, axis=-1)
#     #print('BigHead', BigHead.shape)   

#     attention_output = Dense(DENSE_SIZE, use_bias=False)(BigHead)
#     #print('attention_output', attention_output.shape)

           
#     return attention_output
    
# def attention_layer(a_drop, q_drop, FF):
    
#     #print('a_drop', a_drop.shape)
#     res = MultyHeadAttention(a_drop, q_drop)
#     #print('res', res.shape)
        
#     att = Add()([a_drop, res])
#     #att = normalize()(att)    
 
#     #Feed Forward
#     if FF:
#         att_ff = Dense(DENSE_SIZE*4, activation = 'relu')(att)
#         att_ff = Dense(DENSE_SIZE)(att_ff)   
#         att_ff = Dropout(0.1)(att_ff)
#         att_add = Add()([att, att_ff])
#         #att = normalize()(att_add) 
    
#     return att



In [15]:
compute_beat(3, 16)

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [141]:
SEQ_LEN

128

In [169]:
d = deque([np.zeros((2, 3)) for _ in range(2)], maxlen=2)

In [170]:
d

deque([array([[0., 0., 0.],
              [0., 0., 0.]]), array([[0., 0., 0.],
              [0., 0., 0.]])])

In [173]:
d.append(np.ones((2,3)))

In [174]:
d

deque([array([[1., 1., 1.],
              [1., 1., 1.]]), array([[1., 1., 1.],
              [1., 1., 1.]])])

In [142]:
NOTES_PER_BAR

16

In [191]:
# for i, result in enumerate(generate(models, 4, styles)):
# #     print(i)
# #     print(np.array(result).shape)
# #     print(unclamp_midi(result).shape)
# #     print(midi_encode(unclamp_midi(result)))
#     break

In [62]:
g = MusicGeneration(styles[0])
a = g.build_time_inputs()

In [63]:
type(a)

tuple

In [19]:
for i in a:
    print(i.shape)

(128, 48, 3)
(128, 16)
(128, 1)


In [21]:
(a[0]==0).any()

True

In [29]:
(a[1]==0).any()

True

In [65]:
for i in process_inputs([a]):
    print(i.shape)

(1, 128, 48, 3)
(1, 128, 16)
(1, 128, 1)


In [57]:
models[1].predict(process_inputs([a]))[:, -1:, :].shape

(1, 1, 48, 256)

In [74]:
note_features = models[1].predict(process_inputs([a]))[:, -1:, :]
note_features[0, : ,: , :].shape

(1, 48, 256)

In [76]:
b = g.build_note_inputs(note_features[0, : ,: , :])
for i in b:
    print(i.shape)

(1, 48, 256)
(1, 48, 3)
(1, 1)


In [80]:
# b

In [77]:
for i in process_inputs([b]):
    print(i.shape)

(1, 1, 48, 256)
(1, 1, 48, 3)
(1, 1, 1)


In [85]:
pr = np.array(models[2].predict(process_inputs([b])))
pr.shape

(1, 1, 48, 3)

In [94]:
pr2 = pr[0][-1]

In [96]:
pr2[2, -1]

-0.0836291

In [98]:
prob = apply_temperature(pr2[2, :-1], g.temperature)

In [99]:
prob

array([0.49578953, 0.4675863 ], dtype=float32)

In [89]:
g.choose(pr[0][-1], 0)