# Import libraries

In [1]:
# !pip install music21

In [2]:
import music21
from IPython.display import Image, Audio
import os
import json
import numpy as np
import tensorflow as tf
import pprint
import IPython

# Music21 setup

In [3]:
def show(music):
  display(Image(str(music.write('lily.png'))))

def play(music):
  filename = music.write('mid')
  wav_filename = os.path.join(os.path.join(filename)[:-4] + ".wav")
  !fluidsynth -ni font.sf2 $filename -F $wav_filename -r 16000
  display(Audio(wav_filename))

In [4]:
paths = music21.corpus.getComposer('bach')
paths

[PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv1.6.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv10.7.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv101.7.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv102.7.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv103.6.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv104.6.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv108.6.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv11.6.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv110.7.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv111.6.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-packages/music21/corpus/bach/bwv112.5-sc.mxl'),
 PosixPath('/usr/local/lib/python3.7/dist-pa

In [5]:
#us = music21.environment.UserSettings()
#us.create()
#us['lilypondPath'] = 'C:/LilyPond/usr/bin/lilypond.exe'

In [6]:
#music = music21.corpus.parse('bach/bwv87.7.mxl')
music = music21.corpus.parse(paths[6])
show(music)
play(music)

LilyTranslateException: ignored

# Preprocess dataset


```
https://github.com/musikalkemist/generating-melodies-with-rnn-lstm/blob/master/3%20-%20Preprocessing%20dataset%20for%20melody%20generation%20pt%201/code/preprocess.py
```





## Raw preprocessing

In [7]:
# durations in quarter length
ACCEPTABLE_DURATIONS = [
    0.25, # 16th note
    0.5, # 8th note
    0.75,
    1.0, # quarter note
    1.5,
    2, # half note
    3,
    4 # whole note
]

SAVE_DIR = "./encoded_datasets/test_dataset/"
SINGLE_FILE_DATASET_OUTPUT = "./encoded_datasets/single_file_test_dataset.txt"
MAPPINGS_FILE = "./encoded_datasets/mappings.txt"
SEQUENCE_LENGTH = 64
SAVE_MODEL_PATH = "./encoded_datasets/model.h5"
OUTPUT_MIDI_PATH = "./encoded_datasets/"

In [8]:
def hasAcceptableDurations(song, acceptable_durations):
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_durations:
            return False
    return True

In [9]:
def transpose(song):
    # Estimate key using music21
    key = song.analyze("key")

    # Get interval for transposition
    if key.mode == "major":
        interval = music21.interval.Interval(key.tonic, music21.pitch.Pitch("C"))
    if key.mode == "minor":
        interval = music21.interval.Interval(key.tonic, music21.pitch.Pitch("A"))
    
    transposed_song = song.transpose(interval)
    return transposed_song

In [10]:
def encodeSong(song, time_step=0.25):
    encoded_song = []
    for event in song.flat.notesAndRests:
        # Handle notes
        if isinstance(event, music21.note.Note):
            symbol = event.pitch.midi
        # Handle rests
        elif isinstance(event, music21.note.Rest):
            symbol = "r"
        else:
            print(event)

        steps = int(event.duration.quarterLength/time_step)
        for step in range(steps):
            # If it's the first time we see a note/rest, we encode it.
            # Otherwise, it means we're carrying the same symbol a new time step
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")

    # Cast encoded song to str
    encoded_song = " ".join(map(str, encoded_song))
    return encoded_song

In [11]:
def preprocess():
    # Load songs
    paths = music21.corpus.getComposer('bach')
    print("Loaded {} songs".format(len(paths)))

    paths = paths[0:50]

    for i, path in enumerate(paths):
        print('Processing song {}'.format(i))
        song = music21.corpus.parse(path)
        # Remove songs which have non acceptable durations
        if not hasAcceptableDurations(song, ACCEPTABLE_DURATIONS):
            print('- Not acceptable durations')
            continue
        
        # Transpose song to Cmaj/Amin
        #song = transpose(song)

        # Encode song
        encoded_song = encodeSong(song)
        
        # Save encoded song
        save_path = os.path.join(SAVE_DIR, str(i) + ".txt")
        with open(save_path, "w") as fp:
            fp.write(encoded_song)
            print('- Song saved')

In [12]:
preprocess()

Loaded 433 songs
Processing song 0


FileNotFoundError: ignored

## Generate mappings

In [None]:
def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()
    return song

In [None]:
def createSingleFileDataset(dataset_path, output_path, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    dataset = ""

    # Load songs and add delimiters
    for encoded_song_filename in os.listdir(dataset_path):
        file_path = os.path.join(dataset_path, encoded_song_filename)
        song = load(file_path)
        dataset = dataset + song + " " + new_song_delimiter

    # Remove empty space at the end
    dataset = dataset[:-1]

    # Save our final single file dataset
    with open(output_path, "w") as fp:
        fp.write(dataset)

    return dataset

In [None]:
raw_dataset = createSingleFileDataset(SAVE_DIR, SINGLE_FILE_DATASET_OUTPUT, SEQUENCE_LENGTH)
len(raw_dataset)

In [None]:
def createMapping(raw_dataset, mapping_path):
    mappings = {}

    # Identify vocabulary
    raw_dataset = raw_dataset.split()
    vocabulary = list(set(raw_dataset))

    # Create mapping
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # Save vocabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)

In [None]:
createMapping(raw_dataset, MAPPINGS_FILE)

## Create sequences

In [None]:
def datasetToNumerical(raw_dataset, mappings_file):
    numerical_dataset = []

    # Load mappings
    with open(mappings_file, 'r') as fp:
        mappings = json.load(fp)
    
    raw_dataset = raw_dataset.split()

    # Map symbol to numerical value
    for symbol in raw_dataset:
        numerical_dataset.append(mappings[symbol])
    
    return numerical_dataset

In [None]:
numerical_dataset = datasetToNumerical(raw_dataset, MAPPINGS_FILE)

In [None]:
def generateTrainingSequence(numerical_dataset, seq_length):
    # One-hot encode the sequences
    vocabulary_size = len(set(numerical_dataset))
    ohe_numerical_dataset = tf.keras.utils.to_categorical(numerical_dataset, num_classes=vocabulary_size)

    X = []
    y = []

    # Generate training sequences
    num_sequences = len(ohe_numerical_dataset) - seq_length
    for i in range(num_sequences):
        X.append(ohe_numerical_dataset[i: i + seq_length])
        y.append(ohe_numerical_dataset[i + seq_length])

    # Inputs size: (# of sequences, sequence length, vocabulary size)
    X = np.array(X)
    y = np.array(y)

    return X, y


In [None]:
X, y = generateTrainingSequence(numerical_dataset, SEQUENCE_LENGTH)

# Training

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Bidirectional, Flatten

In [None]:
def buildModel(vocabulary_size):
    model = Sequential()
    model.add(LSTM(64, input_shape=(None, vocabulary_size), return_sequences=True))
    model.add(Dropout(0.3))
    model.add(Bidirectional(LSTM(64, return_sequences=False)))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(vocabulary_size, activation="softmax"))

    model.compile(loss="categorical_crossentropy", 
                  optimizer="adam", 
                  metrics=['accuracy'])

    model.summary()

    return model

In [None]:
def train(X, y, epochs, save_model_path, model=None):
    vocabulary_size = len(y[0])
    if model == None:
        model = buildModel(vocabulary_size)

    model.fit(X, y, epochs=epochs, batch_size=32)
    model.save(save_model_path)

In [None]:
train(X, y, 10, SAVE_MODEL_PATH)

# Generate melody

In [None]:
model = tf.keras.models.load_model(SAVE_MODEL_PATH)

with open(MAPPINGS_FILE, "r") as fp:
    mappings = json.load(fp)

start_symbols = ["/"] * SEQUENCE_LENGTH

In [None]:
epochs = 50
train(X, y, epochs, SAVE_MODEL_PATH, model)

In [None]:
def sampleWithTemperature(probabilities, temperature):
    predictions = np.log(probabilities) / temperature
    probabilities = np.exp(predictions) / np.sum(np.exp(predictions))

    choices = range(len(probabilities)) # [0, 1, 2, 3]
    index = np.random.choice(choices, p=probabilities)

    return index

In [None]:
def generateMelody(seed, nb_steps, max_seq_length, temperature):
    np.random.seed(10)
    
    seed = seed.split()
    melody = seed
    seed = start_symbols + seed

    # Map seed to numerical values
    seed = [mappings[symbol] for symbol in seed]

    for _ in range(nb_steps):
        # Limit the seed to max_seq_length
        seed = seed[-max_seq_length:]

        # One-hot encode the seed
        ohe_seed = tf.keras.utils.to_categorical(seed, num_classes=len(mappings))
        ohe_seed = ohe_seed[np.newaxis, ...]

        # Predict the next note
        probabilities = model.predict(ohe_seed)[0] 

        # Sample from probabilities
        note = sampleWithTemperature(probabilities, temperature)

        # Update seed
        seed.append(note)

        output_symbol = [k for k, v in mappings.items() if v == note][0]

        # Check whether we are at the end of the melody
        if output_symbol == "/":
            break
        
        # Update melody
        melody.append(output_symbol)
    
    return melody

In [None]:
def saveMelody(melody, path, step_duration=0.25, format="midi", file_name="mel.mid"):
    # Create a music21 stream
    stream = music21.stream.Stream()

    start_symbol = None
    step_counter = 1

    # parse all the symbols in the melody and create note/rest objects
    for i, symbol in enumerate(melody):

        # handle case in which we have a note/rest
        if symbol != "_" or i + 1 == len(melody):

            # ensure we're dealing with note/rest beyond the first one
            if start_symbol is not None:

                quarter_length_duration = step_duration * step_counter # 0.25 * 4 = 1

                # handle rest
                if start_symbol == "r":
                    music21_event = music21.note.Rest(quarterLength=quarter_length_duration)

                # handle note
                else:
                    music21_event = music21.note.Note(int(start_symbol), quarterLength=quarter_length_duration)

                stream.append(music21_event)

                # reset the step counter
                step_counter = 1

            start_symbol = symbol

        # handle case in which we have a prolongation sign "_"
        else:
            step_counter += 1

    # write the music21 stream to a midi file
    stream.write(format, os.path.join(path, file_name))
    return stream

In [None]:
seed = "67 _ _ _ _ _ 65 _ 64 _ 62 _ 60 _ _ _"
#seed = "67 _ 67 _ 67 _ _ 65 64 _ 64 _ 64 _ _"


# Temperature : between 0 and 1
# 0 : uniform sampling from probabilities
# 1 : argmax
temperature = 0.6

melody = generateMelody(seed, 200, SEQUENCE_LENGTH, temperature)

In [None]:
final_melody = saveMelody(melody, OUTPUT_MIDI_PATH)
print(melody)

In [None]:
show(final_melody)
play(final_melody)

In [None]:
for k in range(50, 60):
    # Temperature : between 0 and 1
    # 0 : uniform sampling from probabilities
    # 1 : argmax
    seed = str(k)
    temperature = 0.4

    melody = generateMelody(seed, 200, SEQUENCE_LENGTH, temperature)

    final_melody = saveMelody(melody, OUTPUT_MIDI_PATH)
    print(melody)

    show(final_melody)
    play(final_melody)