# Music Generation (Vocabulary Approach)

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from IPython.core.debugger import set_trace
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional, Flatten, Input
from keras import utils
from keras.callbacks import ModelCheckpoint
from keras_self_attention import SeqSelfAttention
from keras import optimizers
from sklearn.model_selection import train_test_split
import h5py
import pickle
import os

%matplotlib inline

%load_ext autoreload
%autoreload 2

batch_size = 16
learning_rate = 1e-6
epochs = 2

Using TensorFlow backend.


In [39]:
import keras
keras.__version__

'2.1.6'

## Hyperparameters

In [2]:
clean_path = "../clean-data-2"
track = "Piano"
batch_size = None
num_epoch = None
sequence_length = 50 # if changed, must re-run sequence.py

## Load data

In [3]:
# Load X and y data for training
with h5py.File(clean_path + "/" + track + "_data_1.h5", 'r') as hf:
    X = hf["X"][:]
    y = hf["y"][:]
    
print(X.shape) # Number of training Ex. x Length of sequence x 1 (following shape in)
print(y.shape) # Number of training Ex. x Vocab Size 
n_vocab = y.shape[1]
# y has been hot one encoded
# We might want to split thesse into train/val...

(55032, 50, 1)
(55032, 902)


## Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [29]:
X_small = X_train[0:5120,:]
y_small = y_train[0:5120,:]

## Training

In [30]:
# Normalize input (trains faster?)
network_input = X_small/n_vocab
network_output = y_small

In [31]:
weights = dict((idx,y_small.shape[0]*(1/(freq+1))) for idx,freq in enumerate(np.sum(y,axis=0)))

In [32]:
weights[1245]

0.21669206026747928

In [117]:
-1*np.array(sorted(-1*np.sum(y,axis=0)))

array([23627.,  9655.,  6125., ...,     0.,     0.,     0.])

In [2]:
int_to_note

NameError: name 'int_to_note' is not defined

In [105]:
[int_to_note[idx] for idx in np.sum(y,axis=0).argsort()[-20:][::-1]]

['rest 0.25',
 'rest 0.5',
 'F#2 0.25',
 'F#2 0.0',
 'F#3 0.25',
 'F#3 0.0',
 'A4 0.25',
 '6.11 0.0',
 'rest 1/6',
 'E2 0.25',
 'A4 0.0',
 'C2 0.25',
 'E2 0.0',
 'C2 0.5',
 'D4 0.25',
 'B1 0.5',
 'F#2 0.5',
 'B1 0.0',
 '4.6 0.0',
 '10.11 0.0']

In [95]:
for i in range(y_small.shape[0]):
    print(int_to_note[np.argmax(y_small[i,:])])

C2 2/3
C3 0.25
rest 0.5
C2 0.5
rest 0.25
rest 0.25
rest 0.25
11.3 0.25
C4 0.25
7.11 1/3
4.10 0.0
rest 0.25
F#2 0.25
rest 0.25
F#3 0.0
rest 0.5
A4 0.0
F3 1.5
rest 1.5
D4 0.25
G4 1.0
rest 1/6
7.0 0.25
F#2 0.0
F#3 0.25
F#3 0.25
F#2 0.25
10.2 0.0
rest 1.5
F#2 0.25
A1 2/3
F#2 0.0
rest 0.5
F#3 0.0
E-3 0.0
3 0.25
7.0 1/3
rest 5.0
D4 0.0
F#2 0.25
10.11 0.0
0.5 0.5
B1 0.0
rest 0.25
E2 1/3
rest 0.25
rest 0.25
A2 1.0
D4 0.0
2.6 0.0
6.11 1/3
3.6.10 0.0
0.5 0.0
rest 0.25
10.11 0.0
C#4 1.0
C2 0.5
E2 0.25
6.11 0.0
F#2 0.0
6 0.0
rest 0.5
G2 0.25
9.0 1.0
0.6 0.0
F#3 0.25
rest 0.5
6.11 0.0
A3 0.0
F#3 0.0
C#4 1/3
E2 0.75
F#2 0.25
F#2 0.0
F#1 8/3
F#3 0.25
3.7 0.5
6.11 0.25
rest 0.25
E-3 0.0
11.2 1/3
6.10 0.0
E2 1.0
C2 0.5
rest 0.25
F#3 0.0
F#2 0.0
rest 0.5
C2 0.5
6.7.8.9 0.0
rest 0.25
C#2 0.5
D4 0.5
C2 0.5
F#2 0.0
E2 0.0
A4 0.25
rest 0.5
0.6 0.0
rest 0.5
rest 0.25
rest 0.25
11.3 0.0
rest 2/3
C5 0.25
B1 0.5
rest 0.25
F#3 0.25
G2 0.25
rest 0.5
G#2 0.25
F#2 0.5
G2 1.0
D2 7.0
8.1 1/3
11.3 0.0
10.3 1.0
C5 0.25

rest 0.25
F#2 0.0
C#6 0.5
6.7 0.0
rest 0.5
rest 0.25
10.3 0.75
rest 0.25
B-4 0.0
A4 0.0
G2 1.0
rest 0.5
G#2 0.25
2.6 1/3
E2 0.0
rest 0.25
G#2 0.0
C#2 0.25
rest 0.5
G2 0.0
rest 0.5
E-2 0.5
rest 0.25
6.10 0.25
rest 1/6
G2 0.25
6.11 0.0
F#3 0.25
B-2 1.0
F2 0.5
rest 0.5
F#2 0.25
6.11 0.0
A4 0.0
rest 0.25
rest 1/6
D4 0.25
2.7 4.0
D4 2.25
F#2 0.25
0.4.7 0.0
11.3 0.0
E2 0.25
11 0.0
0.3.7 0.25
rest 1/6
C5 0.5
G2 0.0
0.6 0.0
F#2 0.0
11.1.6 0.0
B-4 0.0
E2 0.25
rest 0.5
rest 0.25
6.7.8.9 0.0
rest 31.0
B-4 0.0
rest 1.5
G#4 2/3
rest 0.25
11.4 1/3
G3 5.5
B1 0.0
F#2 0.25
rest 1/3
rest 0.25
3 0.25
4.7.11 0.25
A4 0.0
C2 0.5
E-5 0.25
F#2 0.0
10.2 0.5
C#5 4/3
rest 1.5
B-4 4/3
rest 0.25
F#2 0.0
0.2 0.0
6.11 0.0
4.8 0.25
B1 1.25
F#2 1/3
rest 0.25
5.6 0.0
E2 0.0
B-2 0.0
A3 0.25
A4 0.5
rest 0.25
A4 0.0
0.6 0.0
F#3 0.0
C2 0.0
7.10 0.5
G4 2/3
F#3 3.25
B3 2.5
9.2 0.75
G3 0.5
rest 0.25
4.10 0.0
F#3 0.0
C2 1/3
G#2 0.0
B1 0.25
F#2 0.0
F#3 0.0
A4 0.25
rest 0.25
11.3 0.25
rest 1.0
rest 0.25
B-4 0.0
11.1.6 0.0
6.11 0

rest 0.25
6.7.8.9.11.2 0.0
F#5 1.0
6.11 0.0
F#3 0.25
6.10.1 0.5
rest 0.25
6.9 0.0
F#3 0.25
C2 0.25
F#3 0.25
B1 0.75
0.6 0.0
6.11 0.0
rest 0.25
rest 0.25
rest 0.25
F#2 0.5
6.7.8.9.11 0.0
F#3 0.25
6.11 0.0
4.6 0.25
rest 0.25
C3 1.0
rest 1.0
rest 0.25
3.6.10 0.0
rest 1.5
rest 0.25
F#2 0.0
rest 0.25
E-5 1.0
rest 0.5
F#2 0.25
3.6 0.0
A4 0.25
E1 0.25
rest 0.25
F#2 0.0
rest 0.5
B-4 0.5
rest 0.5
rest 0.25
G#5 2.0
G#6 0.25
8.11 0.0
E-5 0.25
A4 0.0
F#2 0.25
rest 0.5
A4 0.0
B-3 0.25
C2 1/3
F#2 0.0
11.2 0.25
E1 0.25
C4 0.5
rest 0.5
rest 0.25
D4 0.25
rest 0.25
rest 0.25
E2 0.5
F#2 0.0
rest 0.25
B-2 1.0
B-4 1.75
rest 0.25
rest 0.25
rest 1/3
9.11 0.5
rest 0.5
6.11 0.0
E2 0.25
G#2 0.5
11.3 1/3
rest 0.25
rest 0.5
rest 0.25
rest 0.25
rest 0.25
6.11 0.0
rest 0.25
F#2 0.25
6.11 0.0
A4 0.0
6.11 0.0
rest 0.25
G#6 0.25
4.8 0.25
A4 0.25
A4 0.25
A4 0.0
6.9 0.0
rest 0.75
G2 0.25
10.11 0.0
E-3 0.25
rest 0.25
C2 1/3
F#2 0.25
11.3 0.0
E-5 0.25
11.1.3.6 0.0
9.11.0 0.0
G#2 0.25
F#3 0.0
C2 0.25
D4 0.25
F#2 0.0
E4 0.0

In [43]:
network_input.shape[1]

50

In [44]:
def create_network(network_input, n_vocab, batch_size):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(Input(batch_shape=(batch_size,network_input.shape[1],network_input.shape[2])))
    model.add(Bidirectional(LSTM(512,return_sequences=True),input_shape=(network_input.shape[1], network_input.shape[2])))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(LSTM(512,return_sequences=True))
    model.add(Dropout(0.3))
    
    model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model

def train(model, network_input, network_output):
    """ train the neural network """
    filepath = os.path.abspath("weights-1LSTMlayer_clas_weights-{epoch:03d}-{loss:.4f}.hdf5")
    checkpoint = ModelCheckpoint(
        filepath,
        period=5, #Every 1 epochs
        monitor='loss',
        verbose=1,
        save_best_only=False,
        mode='min'
    )
    callbacks_list = [checkpoint]

    model.fit(network_input, network_output, epochs=50, batch_size=64, class_weight=weights,callbacks=callbacks_list)

In [45]:
model = create_network(network_input, n_vocab, 1024)

TypeError: The added layer must be an instance of class Layer. Found: Tensor("input_3:0", shape=(1024, 50, 1), dtype=float32)

In [28]:
train(model, network_input, network_output)

Epoch 1/50

KeyboardInterrupt: 

In [118]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

## Generating Predictions

In [34]:
# Load best model and its weights
weight_path = "weights-1LSTMAtt1LSTMLayer-050-3.9694.hdf5"
best_model = create_network(network_input, n_vocab)
best_model.load_weights(weight_path)

# Load int to note, note to int maps
with open(clean_path + "/" + track + "_note_to_int.pkl", 'rb') as filepath:
    note_to_int = pickle.load(filepath)
    
int_to_note = dict((value, key) for key, value in note_to_int.items())

In [36]:
best_model.layers

[<keras.layers.recurrent.LSTM at 0x1a8d6d0cc0>,
 <keras.layers.core.Dropout at 0x1a8d6d0f28>,
 <keras.layers.core.Flatten at 0x1a8d6d0080>,
 <keras.layers.core.Dense at 0x1a8d6d0fd0>,
 <keras.layers.core.Activation at 0x1a8e474f28>]

In [88]:
#source:https://github.com/JakeNims1305/DataScienceMusic/blob/master/1LSTMAtt1LSTMLayer-fulldataset-results/DataScienceMusic.ipynb
def generate_notes(model, X_test):
    """
    Generate predictions on sequences model has never seen before. 
    """
    gen_seq_len = 300
    # pick a random sequence from the input as a starting point for the prediction
    #start = np.random.randint(0, len(X_test)-1)
    start = 5
    
    pattern = np.empty((sequence_length + gen_seq_len,1))
    pattern[0:sequence_length,:] = X_small[start]
    prediction_output = []

    # generate gen_seq_len notes
    for i in range(gen_seq_len):
        prediction_input = np.reshape(pattern[i:i+sequence_length], (1, sequence_length, 1))
        prediction_input = prediction_input/n_vocab

        prediction = model.predict(prediction_input, verbose=0)
        
        index = np.argmax(prediction)
        #print(index)
        prediction_output.append(int_to_note[index])
        
        pattern[sequence_length-1+i,:] = index

    return prediction_output

def create_midi(prediction_output):
    """
    Convert predictions to midi file
    """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        pattern = pattern.split()
        temp = pattern[0]
        duration = pattern[1]
        pattern = temp
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a rest
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano() #???
            output_notes.append(new_rest)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += convert_to_float(duration)

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp='test_output.mid')

# source:https://stackoverflow.com/questions/1806278/convert-fraction-to-float
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            whole = 0
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

In [24]:
def predict():
    """
    Makes predictions on data model has never seen before.
    """
    prediction_output = generate_notes(best_model,X_test)
    create_midi(prediction_output)

In [39]:
test = np.random.choice(np.arange(n_vocab),size=(1,sequence_length, 1))

In [57]:
test = np.random.choice(np.arange(n_vocab),size=(1,sequence_length, 1))

In [58]:
test

array([[[2189],
        [2674],
        [1836],
        [3357],
        [2324],
        [3271],
        [ 170],
        [ 533],
        [2226],
        [2058],
        [ 401],
        [ 886],
        [1759],
        [ 446],
        [2236],
        [1103],
        [ 736],
        [2075],
        [3143],
        [1394],
        [1239],
        [1607],
        [1634],
        [1444],
        [2570],
        [1914],
        [1986],
        [2637],
        [3439],
        [ 107],
        [2198],
        [3600],
        [2588],
        [2105],
        [3646],
        [ 481],
        [ 553],
        [2024],
        [3522],
        [2737],
        [ 116],
        [1073],
        [3218],
        [1997],
        [3030],
        [ 438],
        [2287],
        [ 818],
        [3404],
        [2192]]])

In [68]:
input_

array([[[0.42444868],
        [0.28205826],
        [0.42199837],
        [0.16852709],
        [0.14484073],
        [0.42199837],
        [0.83419548],
        [0.16852709],
        [0.26054996],
        [0.42199837],
        [0.16852709],
        [0.14484073],
        [0.42199837],
        [0.83419548],
        [0.16852709],
        [0.26054996],
        [0.42199837],
        [0.16852709],
        [0.14484073],
        [0.42199837],
        [0.83419548],
        [0.16852709],
        [0.04356112],
        [0.42199837],
        [0.16852709],
        [0.14484073],
        [0.42199837],
        [0.83419548],
        [0.16852709],
        [0.26054996],
        [0.42199837],
        [0.16852709],
        [0.14484073],
        [0.42199837],
        [0.83419548],
        [0.16852709],
        [0.04356112],
        [0.42199837],
        [0.16852709],
        [0.14484073],
        [0.42199837],
        [0.83419548],
        [0.16852709],
        [0.26054996],
        [0.42199837],
        [0

In [146]:
input_ = network_input[5,:,:].reshape(1,sequence_length,1)
np.argmax(best_model.predict(input_))

1245

In [147]:
score = np.squeeze(best_model.predict(input_))
-1*np.array(sorted(-1*score))[0:10]

array([0.35898936, 0.10280529, 0.05196646, 0.04807739, 0.04492951,
       0.03213976, 0.03155024, 0.02067539, 0.02034225, 0.01603523],
      dtype=float32)

In [148]:
[int_to_note[i] for i in np.squeeze(best_model.predict(input_)).argsort()[-20:][::-1]]

['rest 0.25',
 'rest 0.5',
 'F#2 0.0',
 '6.11 0.0',
 'F#3 0.0',
 'D4 0.5',
 'F#2 0.25',
 '2.6 0.0',
 '10.11 0.0',
 'F#3 0.25',
 'A4 0.25',
 'B1 0.0',
 'G#2 0.0',
 '4.6 0.25',
 'E2 0.25',
 'C2 0.5',
 '4.10 0.0',
 'rest 1/12',
 '0.6 0.25',
 'B1 0.25']

In [None]:
# 3 is good!

In [73]:
int_to_note[160]

'C2 0.5'

In [89]:
prediction_output = generate_notes(best_model,X_test)

In [90]:
prediction_output

['rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest 0.25',
 'rest

In [63]:
[int_to_note[char] for char in test]

TypeError: unhashable type: 'numpy.ndarray'

In [61]:
[int_to_note[char] for char in X_test[4,:,0]]

['rest 1/6',
 '4.7 1/3',
 '6.9 1/3',
 'F#3 0.25',
 'rest 1/6',
 'C4 1/3',
 'D4 1/3',
 'F#3 0.25',
 'rest 1/6',
 '4.7 1/3',
 '6.9 1/3',
 'F#3 0.25',
 'rest 1/6',
 'C4 1/3',
 'D4 1/3',
 'F#3 0.25',
 'rest 1/6',
 '6.11 1/3',
 '7 5/3',
 '7.11 1/3',
 'F#3 0.25',
 'rest 1/6',
 'D4 1/3',
 'D4 1/3',
 'F#3 0.25',
 'rest 1/6',
 'F#4 1/3',
 '7.11 1/3',
 'F#3 0.25',
 'rest 1/6',
 'B4 1/3',
 'D4 1/3',
 'F#3 0.25',
 'rest 1/6',
 '7.11 1/3',
 'C3 5/3',
 '7.0 1/3',
 'F#3 0.25',
 'rest 1/6',
 'E4 1/3',
 'E4 1/3',
 'F#3 0.25',
 'rest 1/6',
 'C5 1/3',
 '7.0 1/3',
 'F#3 0.25',
 'rest 1/6',
 'B4 1/3',
 'E4 1/3',
 'F#3 0.25']

In [98]:
pattern = X_test[1].tolist()

In [100]:
pattern

[[2641],
 [1245],
 [1581],
 [2291],
 [1245],
 [2641],
 [863],
 [2984],
 [2641],
 [1036],
 [1064],
 [2641],
 [1036],
 [1976],
 [866],
 [863],
 [309],
 [2641],
 [1245],
 [1002],
 [1677],
 [866],
 [1245],
 [2641],
 [863],
 [2641],
 [1036],
 [1002],
 [2641],
 [1036],
 [1868],
 [2641],
 [863],
 [2182],
 [2641],
 [1245],
 [1581],
 [2291],
 [1245],
 [2641],
 [863],
 [2641],
 [1036],
 [2182],
 [2641],
 [1036],
 [1976],
 [2641],
 [863],
 [874]]