In [46]:
# !unzip '/home/deepakachu/Desktop/DMS/project/deutschl.zip' -d '/home/deepakachu/Desktop/DMS/project/content'

# Preprocessing of the Dataset

In [47]:
import os
import music21 as m21

## Converting songs to m21 score objects
 We filter out only the ".krn" files from the dataset and parse the files to m21 converter that turns them into a m21 score object.

In [48]:
def load_music(datapath):
  songs = []
  for path, subdirs, files in os.walk(datapath):
    for file in files:
      if file[-3:] == "krn":
        song = m21.converter.parse(os.path.join(path, file))
        songs.append(song)
  return songs

 We also filter out the songs by the duration of the notes that the song has. We want each not in the song to to be atleast a 16th note all the way to a while note. This is important since we want to convert the song to time series data.

In [49]:
def acceptable(song):
  acceptable_durations = [0.25, 0.5, 0.75, 1, 1.5, 2, 3, 4]
  for note in song.flat.notesAndRests:
    # ".flat" flattens the song into a list of objects and ".notesAndRests" filters out
    # any other object that is not a note or a rest.
    if note.duration.quarterLength not in acceptable_durations:
      return False
  return True

## Transposing the song to C major/ A minor
We get the key from the score object, check if  the key is an instance of existing collection of keys, else estimate the key of the song by using the "analyze" function.

We then find the interval for transposition. Here we will be transposing songs with major keys to "C major" and song with minor keys to "A minor".
This simplification is so that the model has a more streamlined dataset and has to learn only 2 keys, reduce computational intensity and the size if the dataset.

We calculate the interval between the song's key and the key we wish to transpose the song to using the "Interval" function of interval object.

The function takes 2 pitch objects and returns the interval between them as an interval object.

In [50]:
def transpose(song):
  # get key of the song
  parts = song.getElementsByClass(m21.stream.Part)
  measures = parts[0].getElementsByClass(m21.stream.Measure)
  key = measures[0][4]

  # if not, estimate the key
  if not isinstance(key, m21.key.Key):
    key = song.analyze("key")
  print(key)

  # calculate the interval for transposition
  if key.mode == "major":
    interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("D"))
  elif key.mode == "minor":
    interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("G"))

  # transpose the song
  transposed_song = song.transpose(interval)
  return transposed_song

## Encoding the song into time series data
We analyse each note in the song and create a time series representation of the song.

eg: C4 is mapped to 60 in MIDI.
Suppose we have a C4 whole note (4 beats), we represent it as \[ "60", "\_", "\_", "\_"] where "\_" represents time\_step / 1 beat.

In [51]:
def encode_song(song, time_step = 0.25):
  encoded_song = []
  for event in song.flat.notesAndRests:
    # check if the event is a note
    if isinstance(event, m21.note.Note):
      # store the MIDI equivalent of the note
      symbol = event.pitch.midi
    elif isinstance(event, m21.note.Rest):
      symbol = "r"
    # ".duration.quarterLength" returns the beat of a note for which 4 beats = 1 note
    steps = int(event.duration.quarterLength / time_step)
    # for each note, encode the MIDI equivalent and the duration info.
    for step in range(steps):
      if step == 0:
        encoded_song.append(symbol)
      else:
        encoded_song.append("_")
  # convert the encoded time series data to a string
  encoded_song = " ".join(map(str, encoded_song))
  return encoded_song

## The preprocess function

We load the dataset, check if all the notes are of acceptable duration, transpose the song, encode it and then save it.

In [52]:
def preprocess(datapath):
  print("Loading songs...")
  songs = load_music(datapath)
  print(f"Loaded {len(songs)} songs.")

  for i, song in enumerate(songs):
    if not acceptable(song):
      continue

    song = transpose(song)
    encoded_song = encode_song(song)
    save_path = os.path.join('/home/deepakachu/Desktop/DMS/project/content/dataset1', str(i))
    with open(save_path, "w") as fp:
      fp.write(encoded_song)

## Merging all the processed songs into a sequence
We merge all the converted songs to a single file to facilitate the training of the NN, it is easier to manipulate/encode this single file to feed into the network.

In [53]:
def load(path):
  # function to load a processed song given the path
  with open(path, "r") as fp:
    song = fp.read()
  return song

In [54]:
def single_file(processed_dataset_path, destination, seq_len = 64):
  # load encoded songs and merge them using a delimiter, this function
  # will create a sequence of 64 songs seperated by "/"
  delimiter = "/ "*seq_len
  songs = ""
  for path, _, files in os.walk(processed_dataset_path):
    for file in files:
      file_path = os.path.join(path, file)
      song = load(file_path)
      songs = songs + song + " " + delimiter
  songs = songs[:-1]

  # write the merged sequence to the destination
  with open(destination, "w") as fp:
    fp.write(songs)
  return songs

## Map the merged sequence onto integers
We need to convert the song sequence we have to an integer sequence in ordder to feedit to the model, so we need an integer mapping for all the symbols in the songs sequence.

In [55]:
import json

In [56]:
def mapping(songs, mapping_path="/home/deepakachu/Desktop/DMS/project/content/dataset1/mapping.json"):
  # split the sequence, extract vocabulary
  mappings = {}
  songs = songs.split()
  vocab = list(set(songs))

  # map each symbol in vocabulary to a unique number
  for i, symbol in enumerate(vocab):
    mappings[symbol] = i

  # save as a json file
  with open(mapping_path, "w") as fp:
    json.dump(mappings, fp, indent = 4)

## Converting the songs sequence to an integer sequence
We use the integer mapping we obtained earlier to convert the songs sequence to an integer sequence by replacing the symbols with thier mapped integers.

In [57]:
def convert_to_int(songs):
  int_songs = []
  # load the mapping json file
  with open("/home/deepakachu/Desktop/DMS/project/content/dataset1/mapping.json", "r") as fp:
    mappings = json.load(fp)
  # replace the symbols with their corresponding integer equivalent
  songs = songs.split()
  for symbol in songs:
    int_songs.append(mappings[symbol])

  return int_songs

# Generation of Training Dataset

In [58]:
import numpy as np
import tensorflow.keras as keras

## Generating training sequences
Here we create the training sequences by generating sequences from the integer sequence, each sequence will have 64 elements that will be used as the historical data based on which the next element will be predicted by the model.

The sequence length can also be altered so as to facilitate generation of more complex music.

The sequences generated are then one-hot encoded to feed into the neural network.


In [59]:
def generate_train(seq_len=64):
  songs = load("/home/deepakachu/Desktop/DMS/project/content/dataset1/single_file")
  int_songs = convert_to_int(songs)

  # Finding out the max number of sequences we can generate of the given length
  num_length = len(int_songs) - seq_len
  inputs = []
  targets = []

  for i in range(num_length):
    # the input sequence will be the sliced list of length 64
    inputs.append(int_songs[i:i + seq_len])
    # the output will be the element immediately succeeding the input sequence
    targets.append(int_songs[i+seq_len])

  # one hot encoding the sequences
  vocab_size = len(set(int_songs))
  inputs = keras.utils.to_categorical(inputs, num_classes = vocab_size)
  targets = np.array(targets)

  return inputs, targets

# Training a LSTM network for melody generation


1. `build_model(output_units, num_units, loss, learning_rate)`: This function constructs the architecture for an LSTM-based neural network used in music generation. It sets up an input layer, adds an LSTM layer with a specified number of units, incorporates dropout to prevent overfitting, and concludes with an output layer using a softmax activation. The model is then compiled with the provided loss function, an Adam optimizer with the specified learning rate, and accuracy as a metric. Finally, it displays a summary of the model's structure and returns the compiled model.

2. `train(output_units, num_units, loss, learning_rate)`: This function handles the training of the music generation model. It defines predetermined values for output units, loss, learning rate, and the number of units in the LSTM layer. It generates training sequences, likely comprising input and target sequences. The model is built using the `build_model` function, and training occurs with a specified number of epochs and batch size. Once trained, the model is saved to a file named "model.h5" in the "/content/" directory.

In [60]:
import tensorflow.keras as keras

In [61]:
def build_model(output_units, num_units, loss, learning_rate):

  # create the model architecture, we will be using keras functional API
  input = keras.layers.Input(shape=(None, output_units))
  x = keras.layers.LSTM(num_units[0])(input) #adding an LSTM layer
  x = keras.layers.Dropout(0.2)(x) # adding a Dropout layer to avoid overfitting

  output = keras.layers.Dense(output_units, activation="softmax")(x)

  model = keras.Model(input, output)

  # compile model
  model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=learning_rate),
                metrics=["accuracy"])

  model.summary()

  return model



In [68]:
def train():
  output_units = 23 # the vocabulary size
  loss = "sparse_categorical_crossentropy"
  learning_rate = 0.001
  num_units = [256] # 1 layer with 256 neurons

  # generate the training sequences
  inputs, targets = generate_train()

  # build the model
  model = build_model(output_units, num_units, loss, learning_rate)

  # train the model
  model.fit(inputs, targets, epochs=50, batch_size=64)

  # save the model
  model.save("/home/deepakachu/Desktop/DMS/project/content/model1.h5")

 ## Generation of Melodies and Saving them
 
The MelodyGen class is designed to create musical melodies using a pre-trained neural network model. It loads the model and a symbol-to-integer mapping dictionary during initialization. The generate method generates melodies from a given seed sequence, controlling the length and randomness. Temperature-based sampling influences the level of randomness. The melodies are constructed symbol by symbol, considering a maximum sequence length and an end-of-melody symbol ('/').

The save_melody method converts the generated symbols into musical notation (MIDI format) and saves them to a file. This class offers flexibility in melody generation, allowing users to specify the number of steps in the melody and the level of randomness. Ultimately, it enables the creation and storage of musical compositions.


In [69]:
class MelodyGen:
  def __init__(self, model_path = "/home/deepakachu/Desktop/DMS/project/content/model1.h5"):
    self.model_path = model_path
    self.model = keras.models.load_model(model_path)

    with open("/home/deepakachu/Desktop/DMS/project/content/dataset1/mapping.json", "r") as fp:
      self._mappings = json.load(fp)

    self._start_symbols = ["/"]*64

  def generate(self, seed, num_steps, max_seq_len, temperature):
    # create seed with symbols
    seed = seed.split()
    melody = seed
    seed = self._start_symbols + seed
    # map seed to int
    # we are creating a seed and then mapping all the symbols
    # to the integers using the mapping dict
    seed = [self._mappings[symbol] for symbol in seed]


    for _ in range(num_steps):
      # limit the seed to max_sequence_len
      seed = seed[-max_seq_len:]
      # one hot encode the seed
      one_hot_seed = keras.utils.to_categorical(seed, num_classes=len(self._mappings))
      # the below line just adds an extra axis to convert the array into 3d
      one_hot_seed = one_hot_seed[np.newaxis, ...]

      # making a prediction
      probabilities = self.model.predict(one_hot_seed)[0]
      output = self._sample_with_temperature(probabilities, temperature)
      # update the seed so that we can feed it into network again
      seed.append(output)

      output_symbol = [k for k, v in self._mappings.items() if v == output][0]

      # check whether we're at the end of a melody
      if output_symbol == "/":
        continue
      # if not, append the predicted note
      melody.append(output_symbol)
    return melody

  def _sample_with_temperature(self, probabilities, temperature):
    """
    temp --> inf
    this will make the sampling much more random, not specific/rigid
    temp --> o
    this will make the sampling more specific/rigid
    """
    predictions = np.log(probabilities) / temperature
    probabilities = np.exp(predictions) / np.sum(np.exp(predictions))

    choices = range(len(probabilities)) # returns a list of choice indices
    index = np.random.choice(choices, p=probabilities) # we sample one randomly

    return index

  def save_melody(self, melody, mood):
    """Converts a melody into a MIDI file
  
    :param melody (list of str):
    :param min_duration (float): Duration of each time step in quarter length
    :param file_name (str): Name of midi file
    :return:
    """
    step_duration = 0
    
    if mood == "h":
      step_duration = 0.5
    elif mood == "s":
      step_duration = 1
    else:
      step_duration = 0.1
    # create a music21 stream
    stream = m21.stream.Stream()
  
    start_symbol = None
    step_counter = 1
  
    # parse all the symbols in the melody and create note/rest objects
    for i, symbol in enumerate(melody):
  
      # handle case in which we have a note/rest
      if symbol != "_" or i + 1 == len(melody):
  
        # ensure we're dealing with note/rest beyond the first one
        if start_symbol is not None:
  
          quarter_length_duration = step_duration * step_counter # 0.25 * 4 = 1
  
          # handle rest
          if start_symbol == "r":
            m21_event = m21.note.Rest(quarterLength=quarter_length_duration)
  
          # handle note
          else:
            m21_event = m21.note.Note(int(start_symbol), quarterLength=quarter_length_duration)
  
          stream.append(m21_event)
  
          # reset the step counter
          step_counter = 1
  
        start_symbol = symbol
  
      # handle case in which we have a prolongation sign "_"
      else:
        step_counter += 1
  
    # write the m21 stream to a midi file
    stream.write('midi', fp='/home/deepakachu/Desktop/DMS/project/content/meow.mid')
    # stream.write(format, file_name)



In [71]:
if __name__ == "__main__":
  import random
  preprocess("/home/deepakachu/Desktop/DMS/project/content/essen/europa/deutschl/test")
  songs = single_file("/home/deepakachu/Desktop/DMS/project/content/dataset1", "/home/deepakachu/Desktop/DMS/project/content/dataset1/single_file")
  mapping(songs)
  train()
  happy = [ "69 _ 68 _ 72 _ 64 _ 62 _ 74 _ 60", "55 _ 57 _ 60 _ 62 _ 65 _ 67 _ 71", "72 _ 74 _ 76 _ 60 _ 64 _ 67 _ 69", "65 _ 67 _ 71 _ 74 _ 76 _ 60 _ 64", "67 _ 71 _ 74 _ 76 _ 60 _ 64 _ 55"]
  sad = ["77 _ _ 76 _ _ 67 _ _ 65 _ _ 71 _ _ 55 _ _ 57", "69 _ _ 68 _ _ 72 _ _ 64 _ _ 62 _ _ 74 _ _ 60", "64 _ _ 55 _ _ 69 _ _ 72 _ _ 81 _ _ 76 _ _ 60", "60 _ _ 65 _ _ 57 _ _ 68 _ _ 74 _ _ 72 _ _ 81", "76 _ _ 67 _ _ 60 _ _ 65 _ _ 74 _ _ 72 _ _ 81"]
  angry = ["77 _ 76 _ 67 _ 65 _ 71  _ 55 _ 57", "69 _ 68 _ 72 _ 64 _ 62 _ 74 _ 60", "64 _ 55 _ 69 _ 72 _ 81 _ 76  _ 60", "60 _ 65 _ 57 _ 68 _ 74 _ 72 _ 81", "76 _ 67 _ 60 _ 65 _ 74 _ 72 _ 81"]
  mood = input("enter moood: ")
  mg = MelodyGen()
  seed = ""
  if mood == "s":
    seed = random.choice(sad)
  elif mood == "h":
    seed = random.choice(happy)
  else:
    seed = random.choice(angry)
  melody = mg.generate(seed, 500, 64, 50)
  mg.save_melody(melody, mood)

Loading songs...
Loaded 12 songs.
e minor
C major
e minor
e minor
F major
C major
C major
F major
F major
g minor
e minor
b minor




Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, None, 23)]        0         
                                                                 
 lstm_11 (LSTM)              (None, 256)               286720    
                                                                 
 dropout_11 (Dropout)        (None, 256)               0         
                                                                 
 dense_11 (Dense)            (None, 23)                5911      
                                                                 
Total params: 292631 (1.12 MB)
Trainable params: 292631 (1.12 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50


2023-11-10 16:11:08.611912: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:447] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2023-11-10 16:11:08.611973: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:451] Memory usage: 6488064 bytes free, 4100784128 bytes total.
2023-11-10 16:11:08.611984: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at cudnn_rnn_ops.cc:1764 : UNKNOWN: Fail to find the dnn implementation.


UnknownError: Graph execution error:

Detected at node CudnnRNN defined at (most recent call last):
<stack traces unavailable>
Fail to find the dnn implementation.
	 [[{{node CudnnRNN}}]]
	 [[model_11/lstm_11/PartitionedCall]] [Op:__inference_train_function_12459]

In [None]:
from keras.utils.vis_utils import plot_model

# Assuming you have already created your model using build_model or another method

# Visualize the model architecture
plot_model(mg.model, to_file='/home/deepakachu/Desktop/DMS/project/content/model_plot.png', show_shapes=True, show_layer_names=True)

# Get the model summary
# mg.model.summary()
