In [None]:
# Goal is to create a single dataset to pass to the lstm
import os
import json
import music21 as m21
import numpy as np
# import tensorflow.keras
import tensorflow
import keras

In [None]:
# Creating encoded data for training
env = m21.environment.Environment()
env['musescoreDirectPNGPath'] = r'C:/Program Files/MuseScore 4/bin/MuseScore4.exe'
env['musicxmlPath'] = r'C:/Program Files/MuseScore 4/bin/MuseScore4.exe'

# Create constants
KERN_DATASET_PATH = "deutschl/erk"
SAVE_DIR = "dataset"
SINGLE_FILE_DATASET = "file_dataset"
MAPPING_PATH = "mapping.json"
ACCEPTABLE_DURATIONS = [0.25, 
                        0.5, 
                        0.75,
                        1.0,
                        1.5,
                        2,
                        3,
                        4
                        ]
SEQUENCE_LENGTH = 64

# Load all songs from the data set
def load_songs_in_kern(dataset_path):
    songs = []
    for path, subdirs, files in os.walk(dataset_path):
        for file in files:
            if file[-3:] == "krn":
                song=m21.converter.parse(os.path.join(path, file))
                songs.append(song)
    return songs


# To check if the duration is in the acceptable duration
def has_acceptable_durations(song, acceptable_durations):
    for note in song.flat.notesAndRests:   #flatten data into single list
        if note.quarterLength not in acceptable_durations:
            return False
    return True

# Transpose songs to C maj and A minor because we don't need to know all different keys - just reduce everything to C major and A minor so the model doesn't have to generalize to more keys (Uses less data)
def transpose(song):

    # get key from the song correctly
    parts = song.getElementsByClass(m21.stream.Part)
    measures_part0 = parts[0].getElementsByClass(m21.stream.Measure)
    key = measures_part0[0][4] 

    # estimate key using music21
    if not isinstance(key, m21.key.Key):
        key = song.analyze("key")

    # get interval for transposition. E.g., Bmaj -> Cmaj
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))

    # transpose song by calculated interval
    transposed_song = song.transpose(interval)

    return transposed_song

# Format musical data for LSTM
def encode_song(song, time_step = 0.25):
    # pitch = 60, d=1.0 -? [60, "_", "_", "_"]

    encoded_song = [] # stores all notes and rests in the time series

    for event in song.flat.notesAndRests:
        # handle notes
        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi # is 60
        # handle rests
        elif isinstance(event, m21.note.Rest):
            symbol = "r"

        # convert the note/rest into time series notation
        steps = int(event.duration.quarterLength/time_step)
        for step in range(steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")

    # cast encoded song to a str
    encoded_song = " ".join(map(str, encoded_song)) # join all items in encoded song list

    return encoded_song

# Start cleaning up data
def preprocess(dataset_path):
    print("Loading songs...")
    songs = load_songs_in_kern(dataset_path)
    print(f"Loaded {len(songs)} songs.")

    for i, song in enumerate(songs):
        # Filter out songs that have no-acceptable durations
        if not has_acceptable_durations(song, ACCEPTABLE_DURATIONS):
            continue

        # transpose songs to c major or a minor
        song = transpose(song)

        # encoded songs with music time series representation
        encoded_song = encode_song(song)

        # save songs to text file
        save_path = os.path.join(SAVE_DIR, str(i))
        with open(save_path, "w") as fp:
            fp.write(encoded_song)

# Open song file
def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()
    return song

# Create a string to add all the different songs from the dataset folder
def create_single_file_dataset(dataset_path, file_dataset_path, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    songs = ""

    # load encoded songs and add delimiters
    for path, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(path, file)
            song = load(file_path)
            songs = songs + song + " " + new_song_delimiter
    
    songs = songs[:-1]

    # save string that contains all dataset
    with open(file_dataset_path, "w") as fp:
        fp.write(songs)

    return songs

# Identify the vocabulary or all symbols identified in the dataset & create json file that maps the symbols in the song dataset onto integers or "i"
def create_mapping(songs, mapping_path):
    """
    :param songs (str): String with all songs
    :param mapping_path (str): Path where to save mapping
    :return:
    """

    mappings = {}

    # Identify the vocabulary
    songs = songs.split()
    vocabulary = list(set(songs))

    # create mappings
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # save vocabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)

# Creating a simple lookup table by converting songs into a list of integers
def convert_songs_to_int(songs):
    int_songs = []

    # load mappings
    with open(MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)

    # cast songs string to a list
    songs = songs.split()

    # map songs to int
    for symbol in songs:
        int_songs.append(mappings[symbol])

    return int_songs
    
# Supervied Learning: Using labeled data to make predicitions: Get sequences that are subsets of the time series
def generate_training_sequences(sequence_length):
    # [11, 12, 13, 14, ...] -> (inputs)i: [11, 12], (target) t: 13: i: ,
    # where 11 & 12 get put in a feature (i) to predict the next musical note or the target

    # load the songs and map them to int
    songs = load(SINGLE_FILE_DATASET) # a string that contains all symbols for the song data set

    int_songs = convert_songs_to_int(songs) # mapping symbols into an int

    # generate the training sequences
    # 100 symbols, 64 sl, 100 - 64 = 36
    inputs = []
    targets = []
    num_sequences = len(int_songs) - sequence_length
    # append training sequences at each step
    for i in range(num_sequences):
        inputs.append(int_songs[i:i+sequence_length]) # at each step, this takes a slice of the int songs in a time series, when the counter increases (i) we move slightly to the right in the song dataset
        targets.append(int_songs[i+sequence_length])

    # one-hot encode the sequence or converting categorical variables into binary format for algorithms that need numerical input
    # * one-hot is a good way to analyze categorical data for a neural network
    # inputs: (# of sequences, sequence length, vocabulary size)
    # E.g. [ [0, 1, 2], [1, 1, 2] ] -> [ [ [1, 0, 0], [0, 1, 0], [0, 0, 1], []]
    vocabulary_size = len(set(int_songs))
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)
    targets = np.array(targets)

    return inputs, targets

# Run the main part of the program
def main():
    preprocess(KERN_DATASET_PATH)
    songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_DATASET, SEQUENCE_LENGTH)
    create_mapping(songs, MAPPING_PATH)
    inputs, targets = generate_training_sequences(SEQUENCE_LENGTH)
    a = 1
if __name__ == "__main__":
    main()
    '''
    # load songs
    songs = load_songs_in_kern(KERN_DATASET_PATH)
    print(f"Loaded {len(songs)} songs.")
    song = songs[0]
    filepath = 'output'

    preprocess(KERN_DATASET_PATH)

    # transpose song
    transposed_song = transpose(song)
    transposed_song.write('musicxml', filepath)
    song.show()
    transposed_song.show()
    os.startfile(filepath+".musicxml")
    '''

Loading songs...
Loaded 1700 songs.


  return self.iter().getElementsByClass(classFilterList)


In [None]:
# Training using the LSTM model
OUTPUT_UNITS = 38
NUM_UNITS = [256]
LOSS = "sparse_categorical_crossentropy"
LEARNING_RATE = 0.001
EPOCHS = 50
BATCH_SIZE = 64 # Amoutn of samples that the network is goign to see before running back prop
SAVE_MODEL_PATH = "model.h5"

# Build a model using functional API approach
def build_model(output_units, num_units, loss, learning_rate):
    # create model architecture
    # None: Include the sequence in the first parameter to indicate how many time sets are in network
    # 
    input = keras.layers.Input(shape=(None, output_units))
    x = keras.layers.LSTM(num_units[0])(input)
    x = keras.layers.Dropout(0.2)(x) # avoids overfitting

    output = keras.layers.Dense(output_units, activation="softmax")(x)

    model = keras.Model(input, output)

    model.compile(loss=loss, optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  metrics=["accuracy"])
    
    model.summary()

    return model
    # compile model

# Creating a model through Keras - trying tensor flow
def train(output_units=OUTPUT_UNITS, num_units = NUM_UNITS, loss=LOSS, learning_rate=LEARNING_RATE):
    # Generate the training sequences
    inputs, targets = generate_training_sequences(SEQUENCE_LENGTH)

    # Build the network
    model = build_model(output_units, num_units, loss, learning_rate)

    # Train model
    model.fit(inputs, targets, epochs=EPOCHS, batch_size=BATCH_SIZE)

    # Save the model
    model.save(SAVE_MODEL_PATH)

if __name__ == "__main__":
    train()

Epoch 1/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m590s[0m 104ms/step - accuracy: 0.7515 - loss: 0.9599
Epoch 2/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m592s[0m 104ms/step - accuracy: 0.7952 - loss: 0.6361
Epoch 3/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m568s[0m 100ms/step - accuracy: 0.8091 - loss: 0.5901
Epoch 4/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m553s[0m 98ms/step - accuracy: 0.8219 - loss: 0.5553
Epoch 5/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m557s[0m 98ms/step - accuracy: 0.8275 - loss: 0.5341
Epoch 6/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 99ms/step - accuracy: 0.8328 - loss: 0.5135
Epoch 7/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 99ms/step - accuracy: 0.8368 - loss: 0.5018
Epoch 8/50
[1m5663/5663[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m561s[0m 99ms/step - accuracy: 0.8413 - loss: 0.48



In [None]:
# Melody Generation
def __init__(self, model_path="model.h5"):
    self.model_path = model_path