<a href="https://colab.research.google.com/github/erd3muysal/Music_Generation_with_LSTM_Based_RNN/blob/master/Music_Generation_with_LSTM_Based_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [1]:
# Import all packages
import os
import time
import regex as re
from tqdm import tqdm
import numpy as np
# Import Tensorflow 2.0
%tensorflow_version 2.x
import tensorflow as tf

In [2]:
# Bring dataset file into Colab environment
from google.colab import files
uploaded = files.upload()

Saving Music_of_Ireland.abc to Music_of_Ireland.abc


In [19]:
# Define current working directory
CWD = "/content/" 
# Define name of the dataset to be used
DATASET_NAME = "Music_of_Ireland.abc"
CWD, DATASET_NAME

('/content/', 'Music_of_Ireland.abc')

In [20]:
def load_training_data(cwd, dataset):
  """ Load training data

  Arguments:
  cwd -- Current working directory
  dataset -- Name of the dataset to be trained on

  Return:
  songs -- the list that contains all song strings
  """
  
  with open(os.path.join(cwd, dataset), "r") as f:
    text = f.read()
  songs = extract_song_snippet(text)
  return songs

def extract_song_snippet(text):
  # At every pattern, there is a different song record
  pattern = '\n\n(.*?)\n\n'  
  # Search individual song records according to defined pattern
  search_results = re.findall(pattern, text, overlapped=True, flags=re.DOTALL)
  # Write all song records into a list
  songs = [song for song in search_results]
  # Print number of records
  print("Found {} songs in text".format(len(songs)))
  return songs

songs = load_training_data(CWD, DATASET_NAME)

# Print the song at index 0 in the list to see the details of the ABC notation and structure of data
instance = songs[0]
print("\nInstance song: \n")
print(instance, type(instance))

Found 812 songs in text

Instance song: 

X:2
T:An Buachaill Dreoite
Z: id:dc-hornpipe-2
M:C|
L:1/8
K:G Major
GF|DGGB d2GB|d2GF Gc (3AGF|DGGB d2GB|dBcA F2GF|!
DGGB d2GF|DGGF G2Ge|fgaf gbag|fdcA G2:|!
GA|B2BG c2cA|d2GF G2GA|B2BG c2cA|d2DE F2GA|!
B2BG c2cA|d^cde f2 (3def|g2gf gbag|fdcA G2:|! <class 'str'>


In [21]:
# Join our list of song strings into a single string containing all songs
songs_joined = "\n\n".join(songs) 
# Find and then sort all unique characters in the joined string
vocab = sorted(set(songs_joined))
print("There are", len(vocab), "unique characters in the dataset")

There are 83 unique characters in the dataset


In [6]:
### Define numerical representation of text data so that can be process by computer ###

# Create a mapping from character to unique index.
# For example, to get the index of the character "d", 
# we can evaluate `char2idx["d"]`.  
char2idx = {char:id for id, char in enumerate(vocab)}

# Create a mapping from indices to characters. This is
# the inverse of char2idx and allows us to convert back
# from unique index to the character in our vocabulary.
idx2char = np.array(vocab)

In [22]:
print('{')
for char,_ in zip(char2idx, range(20)):
  print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '"' :   3,
  '#' :   4,
  "'" :   5,
  '(' :   6,
  ')' :   7,
  ',' :   8,
  '-' :   9,
  '.' :  10,
  '/' :  11,
  '0' :  12,
  '1' :  13,
  '2' :  14,
  '3' :  15,
  '4' :  16,
  '5' :  17,
  '6' :  18,
  '7' :  19,
  ...
}


In [23]:
def encode_string(string):
  """ A function to convert all the song strings 
      to a vectorized and numeric representation.

  Arguments:
  string -- List of strings

  Return:
  encodeded_string -- Encoded list of strings
  
  NOTE: the output of the `vectorize_string` function 
  should be a np.array with `N` elements, where `N` is
  the number of characters in the input string
  """
  
  encodeded_string = np.array([char2idx[char] for char in string])
  return encodeded_string

encodeded_songs = encode_string(songs_joined)

In [24]:
print ('{} <--- characters mapped to int ---> {}'.format(repr(songs_joined[:10]), encodeded_songs[:10]))
# Check that vectorized_songs is a numpy array
assert isinstance(encodeded_songs, np.ndarray), "returned result should be a numpy array"

'X:2\nT:An B' <--- characters mapped to int ---> [49 22 14  0 45 22 26 69  1 27]


In [25]:
def get_batch(encodeded_songs, seq_length, batch_size):
  """ Batch definition to create training examples
  """
  
  # The length of the vectorized songs string
  n = encodeded_songs.shape[0] - 1
  # Randomly choose the starting indices for the examples in the training batch
  idx = np.random.choice(n-seq_length, batch_size)

  # A list of input sequences for the training batch
  input_batch = [encodeded_songs[i : i+seq_length] for i in idx]
  # A list of output sequences for the training batch
  output_batch = [encodeded_songs[i+1 : i+seq_length+1] for i in idx]

  # x_batch, y_batch provide the true inputs and targets for network training
  x_batch = np.reshape(input_batch, [batch_size, seq_length])
  y_batch = np.reshape(output_batch, [batch_size, seq_length])
    
  return x_batch, y_batch

x_batch, y_batch = get_batch(encodeded_songs, seq_length=5, batch_size=1)

In [26]:
for i, (input_idx, target_idx) in enumerate(zip(np.squeeze(x_batch), np.squeeze(y_batch))):
    print("Step {:3d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step   0
  input: 82 ('|')
  expected output: 60 ('e')
Step   1
  input: 60 ('e')
  expected output: 26 ('A')
Step   2
  input: 26 ('A')
  expected output: 58 ('c')
Step   3
  input: 58 ('c')
  expected output: 26 ('A')
Step   4
  input: 26 ('A')
  expected output: 1 (' ')


In [27]:
def LSTM_Model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    # Layer 1: Embedding layer to transform indices into dense vectors 
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
    # Layer 2: LSTM with 'rnn_units' number of units
    tf.keras.layers.LSTM(rnn_units, return_sequences = True, recurrent_initializer = 'glorot_uniform', recurrent_activation = 'sigmoid', stateful = True),
    # Layer 3: Dense (fully-connected) layer that transforms the LSTM output into the vocabulary size
    tf.keras.layers.Dense(vocab_size)
  ])

  return model

model = LSTM_Model(vocab_size=len(vocab), embedding_dim=256, rnn_units=1024, batch_size=32)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (32, None, 256)           21248     
_________________________________________________________________
lstm_2 (LSTM)                (32, None, 1024)          5246976   
_________________________________________________________________
dense_2 (Dense)              (32, None, 83)            85075     
Total params: 5,353,299
Trainable params: 5,353,299
Non-trainable params: 0
_________________________________________________________________


In [28]:
x, y = get_batch(encodeded_songs, seq_length=100, batch_size=32)
pred = model(x)
print("Input shape:      ", x.shape, " # (batch_size, sequence_length)")
print("Prediction shape: ", pred.shape, "# (batch_size, sequence_length, vocab_size)")

Input shape:       (32, 100)  # (batch_size, sequence_length)
Prediction shape:  (32, 100, 83) # (batch_size, sequence_length, vocab_size)


In [29]:
sampled_indices = tf.random.categorical(pred[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices

array([15, 66, 61,  6, 20, 76, 41, 22, 33, 35, 35, 35,  2, 25, 79,  5, 58,
       10, 35, 31, 41, 70, 60, 77, 67, 76, 61, 70,  6, 57, 54, 10, 44, 24,
       74,  8, 48, 70, 63,  8, 15, 74, 18, 54,  8, 76, 73, 75, 29, 25, 22,
       40, 11,  3, 12,  6, 51, 76, 58, 13, 36, 60, 78, 45, 19, 54, 33, 28,
       69, 52, 55, 53, 56, 49, 29, 42, 53, 42, 63, 41,  0, 53, 38, 51,  3,
       33, 10, 51, 17, 19, 17, 12, 24, 31, 42, 65,  6, 47, 35, 37])

In [30]:
print("Input: \n", repr("".join(idx2char[x[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices])))

Input: 
 'e|dAFA DAFA|d2ec dcBc|A2ce A2ce|fdec dcBA|!\ndAFA DAFA|d2ec dcBA|gfgb afdf|eABc d2:|!\nB=c|dgg2 bgg2|d'

Next Char Predictions: 
 '3kf(8uP:HJJJ!>x\'c.JFPoevlufo(b^.S=s,Woh,3s6^,urtD>:O/"0(Zuc1KewT7^HCn[_]aXDQ]QhP\n]MZ"H.Z5750=FQj(VJL'


In [31]:
def compute_loss(labels, logits):
  """ The loss function to compute and return the loss between the true 
      labels and predictions (logits). Set the argument from_logits=True.
  """

  loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
  return loss

example_batch_loss = compute_loss(y, pred)

print("Prediction shape: ", pred.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (32, 100, 83)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.4185567


In [39]:
### Hyperparameter setting and optimization ###

# Optimization parameters:
num_epochs = 7500  # Increase this to train longer
batch_size = 64  # Experiment between 1 and 64
seq_length = 125  # Experiment between 50 and 500
learning_rate = 1e-4  # Experiment between 1e-5 and 1e-1

# Model parameters: 
vocab_size = len(vocab)
embedding_dim = 256 
rnn_units = 1024  # Experiment between 1 and 2048

# Checkpoint location: 
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")

In [40]:
# Create model
model = LSTM_Model(vocab_size, embedding_dim, rnn_units, batch_size)
# Define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate)

@tf.function
def train_step(x, y): 
  # Use tf.GradientTape()
  with tf.GradientTape() as tape:
    # Feed the current input into the model and generate predictions
    y_hat = model(x)
    # Compute the loss
    loss = compute_loss(y, y_hat)

  # Compute the gradients
  grads = tape.gradient(loss, model.trainable_variables)
  # Apply the gradients to the optimizer so it can update the model accordingly
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  return loss

###################
# Begin training! #
###################

history = []
if hasattr(tqdm, '_instances'): tqdm._instances.clear() # clear if it exists

for epoch in tqdm(range(num_epochs)):
  # Grab a batch and propagate it through the network
  x_batch, y_batch = get_batch(encodeded_songs, seq_length, batch_size)
  loss = train_step(x_batch, y_batch)

  # Update the progress bar
  history.append(loss.numpy().mean())

  # Update the model with the changed weights!
  if epoch % 100 == 0:     
    model.save_weights(checkpoint_prefix)
    
# Save the trained model and the weights
model.save_weights(checkpoint_prefix)

  8%|▊         | 832/10000 [05:49<1:04:10,  2.38it/s]
100%|██████████| 7500/7500 [24:14<00:00,  5.15it/s]


In [41]:
# Rebuild the model using a batch_size=1
model = LSTM_Model(vocab_size, embedding_dim, rnn_units, batch_size=1)

# Restore the model weights for the last checkpoint after training
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (1, None, 256)            21248     
_________________________________________________________________
lstm_6 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_6 (Dense)              (1, None, 83)             85075     
Total params: 5,353,299
Trainable params: 5,353,299
Non-trainable params: 0
_________________________________________________________________


In [42]:
### Prediction of a generated song ###

def generate_text(model, start_string, generation_length=1000):
  # Evaluation step (generating ABC text using the learned RNN model)

  # Convert the start string to numbers (vectorize)'''
  input_eval = [char2idx[s] for s in start_string] # TODO
  # input_eval = ['''TODO''']
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Here batch size == 1
  model.reset_states()
  tqdm._instances.clear()

  for i in tqdm(range(generation_length)):
      # Evaluate the inputs and generate the next character predictions
      predictions = model(input_eval)
      # predictions = model('''TODO''')
      
      # Remove the batch dimension
      predictions = tf.squeeze(predictions, 0)
      
      # Use a multinomial distribution to sample
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
      # predicted_id = tf.random.categorical('''TODO''', num_samples=1)[-1,0].numpy()
      
      # Pass the prediction along with the previous hidden state
      # as the next inputs to the model
      input_eval = tf.expand_dims([predicted_id], 0)
      
      # Add the predicted character to the generated text!
      text_generated.append(idx2char[predicted_id]) # TODO 
    
  return (start_string + ''.join(text_generated))

In [63]:
generated_text = generate_text(model, start_string="X", generation_length=1000)

100%|██████████| 1000/1000 [00:06<00:00, 152.25it/s]


In [64]:
### Play back generated songs ###

generated_songs = extract_song_snippet(generated_text)

for i, song in enumerate(generated_songs): 
    print(song)

Found 4 songs in text
X:180
T:K:tinpong
Z: id:dc-jig-147
M:6/8
L:1/8
K:G Major
D|G2B G2B|d2g dBG|cBc def|gfe d2B|ABc d2g|!
[1 g2f g2|e2 g2|d2 d2|c2B A2|B3 c B2|A3 F A F2|E3 E E2:|!
X:15
T:Peange tree's Foroush
Z: id:dc-setdance-29
M:|3:1
T:Hoyt Bulley's
Z: id:dc-hornpipe-10
M:C|
L:1/8
K:G Major
(3DEF|GFGB ABd|!
G2Bc B2GB|cBAF GAFA|GBG2 GB,C|ABd e2|GF DA/C|C2 EG:|!
X:55
T:Wevic Fattye Doledorn
Z: id:dc-reel-113
M:C
L:1/8
K:A Dorian
dB|A2BA G2GB|ABAG FDdf|e2dB AGFD|cBcd ecAG|!
AFFG FADF|FDAF G2FG|ADFG AGcd|egfg a2gaf|!
bgaf gfdfe|dcBA GABc|dBge dBGB|checA:|!
M:jixly Da C
L:1/8
K:G Major
GE|G2BG EGBD|GBBA GBBd|a2ga bgfa|gedB AFAg|!
f2af g2fe|dBAF DFEF|DFAF BFAF|D2FA B2:|!
X:227
T:Pate to Menset
Z: id:dc-reel-321
M:C
L:1/8
K:D Major
FA|d2fd ABFA|BDFA BAce|age dBAGFED (3DFE|!
D2DD FDAD|EDFD E2ce|dBAc BEEA|BGE2 B2f|gfg fec|dfe dBA|BGE E2D:|!
