<a href="https://colab.research.google.com/github/Valphai/MusicGen/blob/main/MusicGen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# DEPENDENCIES
import tensorflow as tf
from tensorflow import keras
import numpy as np
from IPython import display as Idisplay
import os
!wget -P /christmas http://www.stephenmerrony.co.uk/ABC/Carols/ABC_Carols_v1.4.zip
!unzip /christmas/ABC_Carols_v1.4 -d /christmas

In [None]:
def load_data():
  rootdir = "/christmas/"
  songs_list = []
  for subdir, dirs, files in os.walk(rootdir):
    for abc in files:
      with open(os.path.join(subdir, abc)) as f:
        song = list(f.readlines())[4:]
        for i, line in enumerate(song):
          if line.startswith(("V:", "w:", "%", "Z:")):
            del song[i]
        if len(song) < 30:
          songs_list.append("".join(song))
  return songs_list 

In [None]:
# checking the validity of the data
try:
  songs_list = load_data()
except:
  os.remove("/christmas/ABC_Carols_v1.4.zip")
  songs_list = load_data()

print(songs_list[0])
print(songs_list[0].__len__())

# joining all the songs into one string and extracting elements
songs = "\n\n".join(songs_list)
vocabulary = sorted(set(songs))
print(vocabulary)

In [None]:
# dictionary of what element is what number
char2idx = {char : num for num, char in enumerate(vocabulary)}
idx2char = np.array(vocabulary)

# vectorize the text (make chars into ints)
def vectorize(string):
  return np.array([char2idx[i] for i in string])

vectorized_songs = vectorize(songs)
print(vectorized_songs.shape)

In [None]:
VOCAB_SIZE = len(vocabulary)
SEQ_LENGTH = 100
EPOCHS = 2000
LEARNING_RATE = 0.01

checkpoint_path = "/christmas_checkpoint"
checkpoint_prefix = os.path.join(checkpoint_path, "chkpt")

In [None]:
def my_model(vocab_size, batch_size=32, output_dim=256, lstm_units=1024):
    model = keras.Sequential([
      keras.layers.Embedding(input_dim=vocab_size, output_dim=output_dim, 
                             batch_input_shape=[batch_size, None]),
      keras.layers.LSTM(lstm_units, return_sequences=True, stateful=True),
      keras.layers.Dense(vocab_size)
    ])
    return model

In [None]:
model = my_model(VOCAB_SIZE)
model.summary() # the model takes in tensor of shape (BATCH_SIZE, SEQ_LENGTH, VOCAB_SIZE)

In [None]:
class Custom_training():
  def __init__(self):
    pass

  optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)

  def gradient_desc(self, inputs, labels):
    """
    Calculate gradients and compute losses during network training.
    inputs and labels are given by get_batches method. 
    This method is used to calculate and return the loss during
    an epoch.
    """
    with tf.GradientTape() as tape:
      # y_hat
      predictions = model(inputs)

      loss = tf.keras.losses.sparse_categorical_crossentropy(labels, predictions, 
                                                      from_logits=True)
      
      # derivatives
      grads = tape.gradient(loss, model.trainable_variables)
      Custom_training.optimizer.apply_gradients(
                                        zip(grads, model.trainable_variables))
    return loss

  def training(self, epochs):
    for epoch in range(epochs):
      inputs, labels = self.get_batches(SEQ_LENGTH)
      loss = self.gradient_desc(inputs, labels)

      if epoch % 500 == 0:
        model.save_weights(checkpoint_prefix)
    model.save_weights(checkpoint_prefix)

  def get_batches(self, seq_len, batch_size=32):
    """
    This method splits data to get inputs and labels. The idea here
    is to pick random index in the dataset and grab a few next letters
    so that the model can learn. This is done once per epoch.
    """
    n = vectorized_songs.shape[0] - 1
    indx = np.random.choice(n - seq_len, batch_size)

    inputs = [vectorized_songs[i : i + seq_len] for i in indx]
    labels = [vectorized_songs[i + 1 : i + 1 + seq_len] for i in indx]

    input_batch = np.reshape(inputs, [batch_size, seq_len])
    label_batch = np.reshape(labels, [batch_size, seq_len])
    return input_batch, label_batch

In [None]:
# training the model
Custom_training().training(epochs=EPOCHS)

In [None]:
# rebuilding the model & loading the weights
model = my_model(VOCAB_SIZE, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_path))
model.build(tf.TensorShape([1, None]))
model.summary()

In [None]:
# get back chars from ints
def generate_song(generation_length=1000):
  """
  The model makes word predictions.
  """
  model.reset_states()
  generated_song = []
  # start input
  start = "X"
  input = tf.expand_dims([char2idx[start]], 0) # (1, 1, 1)
  for i in range(generation_length):
    prediction = model(input)

    prediction = tf.squeeze(prediction, 0)

    # sample the output logits to generate token IDs.
    predicted_id = tf.random.categorical(prediction, num_samples=1)[-1,0].numpy()

    # change generator input passed to the model within the loop
    input = tf.expand_dims([predicted_id], 0)

    generated_song.append(idx2char[predicted_id])

  return (start + "".join(generated_song))

In [None]:
generated_song = generate_song(1000)
print(generated_song)

## Refference

1. TensorFlow Core "Text Generation with an RNN", www.tensorflow.org/tutorials/text/text_generation.