In [1]:
import tensorflow as tf
#import mitdeeplearning as mdl 
import numpy as np
import os
import time
import functools
from IPython import display as ipythondisplay
from tqdm import tqdm 
import my_helpers as mh
import mido

In [2]:
#assert len(tf.config.list_physical_devices('GPU')) > 0
mid = mido.MidiFile('./inputs/inp_1_cMaj_4-4th_temp_1.mid')
song1 = mh.extract_bars('./inputs/inp_1_cMaj_4-4th_temp_1.mid')
song2 = mh.extract_bars('./inputs/inp_2_cMaj_4-4th_temp_1.mid')
song3 = mh.extract_bars('./inputs/inp_3_cMaj_4-4th_temp_1.mid')
song4 = mh.extract_bars('./inputs/inp_4_cMaj_4-4th_temp_1.mid')
song5 = mh.extract_bars('./inputs/inp_5_cMaj_4-4th_temp_1.mid')

songs = np.array(song1 + song2 + song3 + song4 + song5).flatten()

print("songs", songs)


songs [ 60 129 129 ... 129  72 129]


In [3]:
def get_batch(song_list, seq_length, batch_size):
    # get length of inputs and randomly take subset
    n = song_list.shape[0] - 1
    idx = np.random.choice(n-seq_length, batch_size)
    # create matching subsets of input and output strings for rnn
    input_batch =  [song_list[i : i+seq_length] for i in idx]
    output_batch = [song_list[i+1 : i+seq_length+1] for i in idx]
    #create batches in proper size to feed to rnn
    x_batch = np.reshape(input_batch, [batch_size, seq_length])
    y_batch = np.reshape(output_batch, [batch_size, seq_length])
    print("seq_len", seq_length, "batch_size", batch_size)
    print("x_batch", x_batch, "y_batch", y_batch)
    return x_batch, y_batch

x_batch, y_batch = get_batch(songs, 5, 1)



seq_len 5 batch_size 1
x_batch [[129  69 129  72 129]] y_batch [[ 69 129  72 129  69]]


In [4]:
# standard lstm stolen from the internet
def LSTM(rnn_units): 
    return tf.keras.layers.LSTM(
        rnn_units, 
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        recurrent_activation='sigmoid',
        stateful=True,)

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    # vocab size is the number of possible values the inputs (and outputs) can take on
    # i. e. the number of unique characters (or numbers) in the data set 
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        LSTM(rnn_units),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

#model = build_model(vocab_size, embedding_dim=256, rnn_units=1024, batch_size=32)

# custom loss functions

def compute_loss(labels, logits):
    loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
    return loss



In [5]:
# training parameters

num_training_iterations = 20
batch_size = 32
seq_length = 100
learning_rate = 5e-3

vocab_size = 130 # number of unique characters in dataset
embedding_dim = 256
rnn_units = 1024

checkpoint_dir = './rnn_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "checkpoint")

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size)
optimizer = tf.keras.optimizers.Adam(learning_rate)

# custom training step 
def train_step(x, y): 
    with tf.GradientTape() as tape:
        y_hat = model(x) 
        loss = compute_loss(y, y_hat) 

    grads = tape.gradient(loss, model.trainable_variables) 
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

history = []



In [6]:
x, y = get_batch(songs, seq_length=100, batch_size=32)
pred = model(x)
#print("pred0[]", pred[0], pred[0].shape)

predicted_id = tf.random.categorical(pred[0], num_samples=1)[-1,0].numpy()

seq_len 100 batch_size 32
x_batch [[ 60 129  59 ... 129  67 129]
 [129  76 129 ... 129 129  67]
 [129  76 129 ...  72 129 129]
 ...
 [ 64 129  65 ... 129  69 129]
 [129 129  81 ... 129  81 129]
 [ 76 129  79 ... 129  76 129]] y_batch [[129  59 129 ...  67 129  69]
 [ 76 129  74 ... 129  67 129]
 [ 76 129 129 ... 129 129 129]
 ...
 [129  65 129 ...  69 129  67]
 [129  81 129 ...  81 129  79]
 [129  79 129 ...  76 129  72]]


In [7]:
# run the network for a while
for iter in tqdm(range(num_training_iterations)):
    x_batch, y_batch = get_batch(songs, seq_length, batch_size)
    #print(x_batch.shape, y_batch.shape)
    loss = train_step(x_batch, y_batch)

    history.append(loss.numpy().mean())

    if iter % 100 == 0:     
      model.save_weights(checkpoint_prefix)


  0%|          | 0/20 [00:00<?, ?it/s]

seq_len 100 batch_size 32
x_batch [[129 129  77 ... 129  77 129]
 [ 60 129  62 ... 129  65 129]
 [129  72 129 ...  79 129  76]
 ...
 [129 129  77 ... 129  69 129]
 [129  81 129 ...  86 129  84]
 [129  72 129 ...  65 129  62]] y_batch [[129  77 129 ...  77 129 129]
 [129  62 129 ...  65 129  64]
 [ 72 129  76 ... 129  76 129]
 ...
 [129  77 129 ...  69 129  71]
 [ 81 129  79 ... 129  84 129]
 [ 72 129  71 ... 129  62 129]]


  5%|▌         | 1/20 [00:05<01:45,  5.54s/it]

seq_len 100 batch_size 32
x_batch [[ 72 129  69 ... 129  74 129]
 [ 60 129 129 ... 129  67 129]
 [ 62 129 129 ... 129 129 129]
 ...
 [129  62 129 ...  67 129  67]
 [ 79 129  76 ... 129  72 129]
 [ 74 129  72 ... 129  72 129]] y_batch [[129  69 129 ...  74 129  72]
 [129 129 129 ...  67 129  69]
 [129 129 129 ... 129 129  67]
 ...
 [ 62 129  65 ... 129  67 129]
 [129  76 129 ...  72 129  67]
 [129  72 129 ...  72 129  74]]


 10%|█         | 2/20 [00:10<01:38,  5.49s/it]

seq_len 100 batch_size 32
x_batch [[129  72 129 ... 129 129  60]
 [129  67 129 ...  58 129  62]
 [ 72 129 129 ... 129  76 129]
 ...
 [ 79 129 129 ... 129 129 129]
 [ 69 129  67 ... 129  74 129]
 [ 76 129  74 ... 129  72 129]] y_batch [[ 72 129  71 ... 129  60 129]
 [ 67 129  60 ... 129  62 129]
 [129 129 129 ...  76 129  74]
 ...
 [129 129 129 ... 129 129  81]
 [129  67 129 ...  74 129  71]
 [129  74 129 ...  72 129 129]]


 15%|█▌        | 3/20 [00:16<01:32,  5.46s/it]

seq_len 100 batch_size 32
x_batch [[ 67 129  69 ... 129  76 129]
 [ 72 129  74 ... 129  74 129]
 [129  65 129 ...  71 129  72]
 ...
 [ 72 129  67 ... 129  60 129]
 [129  64 129 ...  74 129  76]
 [ 76 129  74 ... 129 129 129]] y_batch [[129  69 129 ...  76 129  72]
 [129  74 129 ...  74 129 129]
 [ 65 129  67 ... 129  72 129]
 ...
 [129  67 129 ...  60 129  57]
 [ 64 129  65 ... 129  76 129]
 [129  74 129 ... 129 129  62]]


 20%|██        | 4/20 [00:21<01:26,  5.42s/it]

seq_len 100 batch_size 32
x_batch [[129 129  77 ... 129  76 129]
 [129  72 129 ...  72 129  77]
 [ 64 129  65 ... 129  65 129]
 ...
 [129 129 129 ...  67 129  69]
 [129  67 129 ...  74 129  71]
 [129 129  76 ... 129 129 129]] y_batch [[129  77 129 ...  76 129  77]
 [ 72 129  76 ... 129  77 129]
 [129  65 129 ...  65 129  64]
 ...
 [129 129  69 ... 129  69 129]
 [ 67 129  65 ... 129  71 129]
 [129  76 129 ... 129 129  72]]


 25%|██▌       | 5/20 [00:26<01:20,  5.38s/it]

seq_len 100 batch_size 32
x_batch [[ 72 129 129 ... 129  65 129]
 [129  76 129 ... 129 129  72]
 [129 129 129 ... 129 129  72]
 ...
 [ 65 129 129 ... 129  69 129]
 [129 129 129 ... 129 129  72]
 [129 129  69 ... 129  79 129]] y_batch [[129 129 129 ...  65 129  64]
 [ 76 129  74 ... 129  72 129]
 [129 129  79 ... 129  72 129]
 ...
 [129 129 129 ...  69 129  67]
 [129 129  64 ... 129  72 129]
 [129  69 129 ...  79 129  76]]


 30%|███       | 6/20 [00:32<01:15,  5.37s/it]

seq_len 100 batch_size 32
x_batch [[129  71 129 ...  72 129  74]
 [ 74 129  76 ... 129  76 129]
 [ 77 129  74 ... 129  76 129]
 ...
 [129  77 129 ...  84 129  86]
 [129  69 129 ...  74 129 129]
 [ 67 129  64 ... 129  62 129]] y_batch [[ 71 129  69 ... 129  74 129]
 [129  76 129 ...  76 129 129]
 [129  74 129 ...  76 129 129]
 ...
 [ 77 129  76 ... 129  86 129]
 [ 69 129  67 ... 129 129 129]
 [129  64 129 ...  62 129  64]]


 35%|███▌      | 7/20 [00:37<01:09,  5.37s/it]

seq_len 100 batch_size 32
x_batch [[ 64 129  69 ... 129  64 129]
 [129  81 129 ...  86 129  88]
 [129  79 129 ...  79 129  77]
 ...
 [129  71 129 ... 129 129  71]
 [ 64 129 129 ... 129  62 129]
 [129 129  60 ... 129  64 129]] y_batch [[129  69 129 ...  64 129  62]
 [ 81 129  79 ... 129  88 129]
 [ 79 129  77 ... 129  77 129]
 ...
 [ 71 129  72 ... 129  71 129]
 [129 129 129 ...  62 129  67]
 [129  60 129 ...  64 129 129]]


 40%|████      | 8/20 [00:42<01:04,  5.35s/it]

seq_len 100 batch_size 32
x_batch [[129  67 129 ...  77 129  79]
 [129  69 129 ...  79 129  76]
 [129  76 129 ...  74 129  76]
 ...
 [129 129  60 ... 129  65 129]
 [129  76 129 ...  76 129  79]
 [ 67 129  69 ... 129  64 129]] y_batch [[ 67 129  69 ... 129  79 129]
 [ 69 129  71 ... 129  76 129]
 [ 76 129  77 ... 129  76 129]
 ...
 [129  60 129 ...  65 129  69]
 [ 76 129  79 ... 129  79 129]
 [129  69 129 ...  64 129  62]]


 45%|████▌     | 9/20 [00:48<00:58,  5.34s/it]

seq_len 100 batch_size 32
x_batch [[129  76 129 ...  69 129  71]
 [ 60 129  59 ... 129  60 129]
 [ 71 129  69 ... 129 129 129]
 ...
 [129  62 129 ...  64 129  67]
 [ 74 129  72 ... 129  74 129]
 [129 129 129 ... 129 129  76]] y_batch [[ 76 129 129 ... 129  71 129]
 [129  59 129 ...  60 129  62]
 [129  69 129 ... 129 129  71]
 ...
 [ 62 129  60 ... 129  67 129]
 [129  72 129 ...  74 129  76]
 [129 129  76 ... 129  76 129]]


 50%|█████     | 10/20 [00:53<00:53,  5.33s/it]

seq_len 100 batch_size 32
x_batch [[ 72 129  69 ... 129  74 129]
 [129  79 129 ...  64 129  60]
 [129  67 129 ...  65 129  62]
 ...
 [ 60 129 129 ... 129  55 129]
 [129  65 129 ... 129 129  60]
 [129 129  69 ... 129  71 129]] y_batch [[129  69 129 ...  74 129 129]
 [ 79 129  76 ... 129  60 129]
 [ 67 129  71 ... 129  62 129]
 ...
 [129 129 129 ...  55 129  60]
 [ 65 129  62 ... 129  60 129]
 [129  69 129 ...  71 129  74]]


 55%|█████▌    | 11/20 [00:59<00:48,  5.37s/it]

seq_len 100 batch_size 32
x_batch [[ 69 129  72 ... 129  72 129]
 [ 74 129 129 ... 129 129 129]
 [129  62 129 ...  67 129 129]
 ...
 [129 129 129 ...  69 129  72]
 [129 129  69 ... 129  72 129]
 [129  74 129 ...  72 129  71]] y_batch [[129  72 129 ...  72 129 129]
 [129 129 129 ... 129 129  76]
 [ 62 129  59 ... 129 129 129]
 ...
 [129 129  67 ... 129  72 129]
 [129  69 129 ...  72 129  67]
 [ 74 129  72 ... 129  71 129]]


 60%|██████    | 12/20 [01:04<00:43,  5.38s/it]

seq_len 100 batch_size 32
x_batch [[ 84 129  81 ... 129 129 129]
 [ 83 129  84 ... 129  81 129]
 [129 129 129 ...  74 129  77]
 ...
 [129  69 129 ...  74 129  72]
 [129  62 129 ...  64 129  65]
 [129  65 129 ...  77 129  79]] y_batch [[129  81 129 ... 129 129  72]
 [129  84 129 ...  81 129  79]
 [129 129  60 ... 129  77 129]
 ...
 [ 69 129  71 ... 129  72 129]
 [ 62 129  64 ... 129  65 129]
 [ 65 129  67 ... 129  79 129]]


 65%|██████▌   | 13/20 [01:09<00:37,  5.39s/it]

seq_len 100 batch_size 32
x_batch [[ 62 129 129 ... 129 129 129]
 [ 59 129  60 ... 129  74 129]
 [129  59 129 ...  64 129  62]
 ...
 [ 72 129  71 ... 129  79 129]
 [ 62 129 129 ... 129  72 129]
 [ 69 129  72 ... 129  76 129]] y_batch [[129 129 129 ... 129 129  72]
 [129  60 129 ...  74 129  76]
 [ 59 129  60 ... 129  62 129]
 ...
 [129  71 129 ...  79 129  76]
 [129 129 129 ...  72 129  71]
 [129  72 129 ...  76 129  72]]


 70%|███████   | 14/20 [01:15<00:32,  5.37s/it]

seq_len 100 batch_size 32
x_batch [[129  76 129 ...  69 129  71]
 [129  69 129 ...  59 129  60]
 [ 69 129  72 ... 129  79 129]
 ...
 [129 129  60 ... 129  77 129]
 [ 69 129  64 ... 129  76 129]
 [129 129  71 ... 129  77 129]] y_batch [[ 76 129  79 ... 129  71 129]
 [ 69 129  65 ... 129  60 129]
 [129  72 129 ...  79 129  76]
 ...
 [129  60 129 ...  77 129  74]
 [129  64 129 ...  76 129  76]
 [129  71 129 ...  77 129  74]]


 75%|███████▌  | 15/20 [01:20<00:26,  5.37s/it]

seq_len 100 batch_size 32
x_batch [[129  72 129 ... 129 129  76]
 [ 69 129  67 ... 129  72 129]
 [ 62 129  64 ... 129 129 129]
 ...
 [ 76 129  74 ... 129  72 129]
 [ 72 129 129 ... 129 129 129]
 [129  72 129 ...  72 129  71]] y_batch [[ 72 129  81 ... 129  76 129]
 [129  67 129 ...  72 129  76]
 [129  64 129 ... 129 129  60]
 ...
 [129  74 129 ...  72 129  74]
 [129 129 129 ... 129 129  76]
 [ 72 129  69 ... 129  71 129]]


 80%|████████  | 16/20 [01:25<00:21,  5.35s/it]

seq_len 100 batch_size 32
x_batch [[129  71 129 ...  65 129  67]
 [129  74 129 ...  69 129  72]
 [129  81 129 ...  79 129  77]
 ...
 [ 74 129  72 ... 129  72 129]
 [ 76 129  72 ... 129  74 129]
 [ 62 129 129 ... 129 129 129]] y_batch [[ 71 129  69 ... 129  67 129]
 [ 74 129  76 ... 129  72 129]
 [ 81 129  79 ... 129  77 129]
 ...
 [129  72 129 ...  72 129  71]
 [129  72 129 ...  74 129  76]
 [129 129 129 ... 129 129  67]]


 85%|████████▌ | 17/20 [01:31<00:15,  5.33s/it]

seq_len 100 batch_size 32
x_batch [[ 74 129  67 ... 129  72 129]
 [129  79 129 ... 129 129  76]
 [ 69 129  69 ... 129  77 129]
 ...
 [129  65 129 ...  60 129 129]
 [129  79 129 ...  83 129  84]
 [129  76 129 ...  76 129  74]] y_batch [[129  67 129 ...  72 129  76]
 [ 79 129  74 ... 129  76 129]
 [129  69 129 ...  77 129  74]
 ...
 [ 65 129  67 ... 129 129 129]
 [ 79 129  81 ... 129  84 129]
 [ 76 129  72 ... 129  74 129]]


 90%|█████████ | 18/20 [01:36<00:10,  5.33s/it]

seq_len 100 batch_size 32
x_batch [[129  77 129 ...  77 129  76]
 [129  72 129 ...  81 129  83]
 [129 129 129 ... 129 129  72]
 ...
 [129 129 129 ...  72 129  74]
 [ 67 129  64 ... 129  60 129]
 [ 60 129 129 ... 129  48 129]] y_batch [[ 77 129  76 ... 129  76 129]
 [ 72 129 129 ... 129  83 129]
 [129 129  59 ... 129  72 129]
 ...
 [129 129  76 ... 129  74 129]
 [129  64 129 ...  60 129 129]
 [129 129 129 ...  48 129  55]]


 95%|█████████▌| 19/20 [01:41<00:05,  5.32s/it]

seq_len 100 batch_size 32
x_batch [[129  72 129 ...  77 129  76]
 [129  64 129 ...  71 129  72]
 [129 129  79 ... 129  67 129]
 ...
 [129  74 129 ...  57 129  62]
 [ 79 129  76 ... 129  64 129]
 [129  76 129 ...  81 129  79]] y_batch [[ 72 129  76 ... 129  76 129]
 [ 64 129  60 ... 129  72 129]
 [129  79 129 ...  67 129  69]
 ...
 [ 74 129  67 ... 129  62 129]
 [129  76 129 ...  64 129  72]
 [ 76 129  72 ... 129  79 129]]


100%|██████████| 20/20 [01:47<00:00,  5.35s/it]


In [19]:
def generate_song(model, start_string, generation_length=100):
    current_string = tf.expand_dims(start_string, 0)
    #print("111111111", current_string)
    song_generated = []
    # Here batch size == 1
    model.reset_states()
    #tqdm._instances.clear()
    for i in range(generation_length):
        predictions = model(current_string)
        #print("p1", predictions, predictions.shape)
        predictions = tf.squeeze(predictions, 0)
        #print("p2", predictions, predictions.shape)
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
        #print("p3", predicted_id)
        current_string = tf.expand_dims([predicted_id],0)
        song_generated.append(predicted_id)

    return (np.array(start_string + song_generated).flatten())

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
# Restore the model weights for the last checkpoint after training
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

model.summary()

generated_song = generate_song(model, start_string=[64], generation_length=159)
print("gen_sng", generated_song, generated_song.shape)
generated_song = generated_song.reshape((16, -1))
mh.write_to_file(generated_song.tolist(), mid.ticks_per_beat, filepath="./", filename="rnn_recon", stretch=False)

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_12 (Embedding)     (1, None, 256)            33280     
_________________________________________________________________
lstm_12 (LSTM)               (1, None, 1024)           5246976   
_________________________________________________________________
dense_12 (Dense)             (1, None, 130)            133250    
Total params: 5,413,506
Trainable params: 5,413,506
Non-trainable params: 0
_________________________________________________________________
gen_sng [ 64  75   2  48   1  21   6  42 106  46 101  96  71   4  35  25  75  41
  48  91  69   6  26  96  57  53  77   6 111  56   7  35  89 109  47  29
  75  91  58  61  90 100  26  60 115  46  78  50  14  35  67 116  69  95
  33 100  24  25   5  15  63  34  10 127  57  94   8  33  68  77  87  18
  95  65  62  31  87  73 102 126  63 108  39 112  72  87  24 124  89 105
