# Preprocessing Data

In [8]:
import os
import music21
import json
import tensorflow.keras
import keras.utils
import numpy as np

# us = environment.UserSettings()
# us.create()
# us['musescoreDirectPNGPath'] = 'Applications/'
# environment.set('musescoreDirectPNGPath', "./")
# music21.converter.program.musescoreDirectPNGPath =  "./"

In [9]:
# Setup

DATASET_PATH = "deutschl/erk"
ACCEPTABLE_DURATIONS = [.25, .5, .75, 1, 1.5, 2, 3, 4]
SAVE_DIR = "./convertedFile"
SINGLE_FLE_DATASET = "./singleFileDataset"
MAPPING = {}
SEQUENCE_LENGTH = 64

In [10]:
def preprocess(dataset_pass):
    # load
    songs = loadSongs(dataset_pass)
    print(f"Number of loaded songs :{len(songs)}")

    for index, song in enumerate(songs): 
        # filter files by duration 
        if not hasAcceptableDuration(song, ACCEPTABLE_DURATIONS):
            continue

        # transpose to smae scale
        song = transpose(song)

        # time series representation encoding 
        encodedSong = encode(song)

        # save 
        save_path = os.path.join(SAVE_DIR, str(index))
        with open(save_path, "w") as fp: 
            fp.write(encodedSong)

def transpose(song):
    # key
    parts = song.getElementsByClass(music21.stream.Part)
    measure_part_zero = parts[0].getElementsByClass(music21.stream.Measure)
    key = measure_part_zero[0][4]

    if not isinstance(key, music21.key.Key):
        key = song.analyze("key")

    # interval transposition
    if key.mode == 'major' : 
        interval = music21.interval.Interval(key.tonic, music21.pitch.Pitch("C"))
    else:
        interval = music21.interval.Interval(key.tonic, music21.pitch.Pitch("A"))
    
    return song.transpose(interval)
    

def loadSongs(dataset_pass):
    songs = []
    for path, subdir, files in os.walk(dataset_pass):
        for file in files: 
            if file[-3:] == "krn":
                song = music21.converter.parse(os.path.join(path, file))
                songs.append(song)
    return songs

def hasAcceptableDuration(song, acceptables):
    for singleNote in song.flatten().notesAndRests:
        if singleNote.duration.quarterLength not in acceptables:
            return False

    return True

def encode(song, timestep=.25):
    encodedSong = []
    for event in song.flatten().notesAndRests:
        if isinstance(event, music21.note.Note):
            symbol = event.pitch.midi
            
        elif isinstance(event, music21.note.Rest):
            symbol = "r"

        # convert into timeseries notation
        steps = int(event.duration.quarterLength/timestep)
        for step in range(steps):
            if step == 0 :
                encodedSong.append(symbol)
            else: 
                encodedSong.append("_")
                
    encodedSong = " ".join(map(str, encodedSong))

    return encodedSong
    

In [11]:
def createSingleFileDataset(datasetPath, fileDatasetPath):
    newSongDelimiter = "/ " * SEQUENCE_LENGTH
    songs = ""

    for path, _, files in os.walk(datasetPath):
        for file in files:
            with open(os.path.join(path, file), "r") as fp: 
                song = fp.read()
            songs = songs + song + " " + newSongDelimiter

    songs = songs[:-1]

    with open(fileDatasetPath, "w") as fp:
        fp.write(songs)

    return songs

def createMapping(songs):
    # mapping = {}
    for index, symbol in enumerate(list(set(songs.split()))):
        MAPPING[symbol] = index
    
    # with open(MAPPING_PATH, "w") as fp: 
    #     json.dump(mapping, fp)

In [12]:
def songsToIntConvertor(songs):
    intSongs = []
    songs = songs.split()
    for symbol in songs : 
        intSongs.append(MAPPING[symbol])

    return intSongs

def generatTrainingSequence(intSongs):
    inputs = []
    targets = []
    
    numberOfSequences = len(intSongs) - SEQUENCE_LENGTH
    for i in range(numberOfSequences):
        inputs.append(intSongs[i:i+SEQUENCE_LENGTH])
        targets.append(intSongs[i+SEQUENCE_LENGTH])

    # one-hot encode based on the MAPPING
    vocabSize = len(set(intSongs))
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabSize)
    targets = np.array(targets)

    return inputs, targets

In [14]:
if __name__ == "__main__":
    preprocess(DATASET_PATH)
    songs = createSingleFileDataset(SAVE_DIR, SINGLE_FLE_DATASET)
    createMapping(songs)
    intSongs = songsToIntConvertor(songs)
    inputs, targets = generatTrainingSequence(intSongs)

Number of loaded songs :1700


# Train a Model : Building and Creating

In [15]:
# Setup
OUTPUT_UNITS = len(MAPPING)
NUM_UNITS = [256]
LOSS = "sparse_categorical_crossentropy"
LEARNING_RATE = 0.001
EPOCHS = 40

In [16]:
def training(inputs, targets, outputUnits=OUTPUT_UNITS, numUnits=NUM_UNITS, loss=LOSS, learningRate=LEARNING_RATE, epochs=EPOCHS):
    # build model 
    input = keras.layers.Input(shape=(None, outputUnits))
    x = keras.layers.LSTM(numUnits[0])(input)
    x = keras.layers.Dropout(0.2)(x)

    output = keras.layers.Dense(outputUnits, activation="softmax")(x)

    model = keras.Model(input, output)
    model.compile(loss= loss, 
                  optimizer=keras.optimizers.Adam(learning_rate=learningRate),
                  metrics=['accuracy'])
    model.summary()

    model.fit(inputs, targets, epochs=epochs, batch_size=64)

    model.save("trained_model.h5")

In [19]:
if __name__ == "__main__":
    training(inputs, targets)

SyntaxError: incomplete input (2728436465.py, line 2)

# If you already have a .h5 file (trained model) 

In [None]:
import tensorflow.keras as keras
import numpy as np 

class MelodyGenerator:

    def __init__(self, model_path="model.h5", mapping = MAPPING):
        self.model_path = model_path
        self.model = keras.models.load_model(model_path)
        self._mapping = mapping
        self._start_symbols = ["/"] * SEQUENCE_LENGTH

    def generate_melody(self, seed, num_steps, max_sequence_length, temperature):
        seed = seed.split()
        melody = seed
        seed = self._start_symbols + seed

        # map seed to integers
        seed = [self._mapping[symbol] for symbol in seed]

    for _ in range(num_steps):
        seed = seed[-max_sequence_length]
        # one hot encoding 
        onehot_seed = keras.utils.to_categorical(seed, num_classes=len(self._mapping))
        onehot_seed = onehot_seed[np.newaxis, ...]

        # prediction
        probabilities = self.model.predict(onehot_seed)[0]

        output_int = self._sample_with_temperature(probabilities, temperature)
        seed.append(output_int)

        # output_symbol = [k for k,v in self._mapping.item() if v == output_int][0]
        for k, v in self._mapping.items():
            if v == output_int:
                output_symbol = k
                break

        if output_symbol == "/":
            break
        
        melody.append(output_symbol)

        return melody

    
    def _sample_with_temperature(self, probabilities, temperature):
        predictions = np.log(probabilities) / temperature
        probabilities = np.exp(predictions) / np.sum(np.exp(predictions))

        choices = range(len(probabilities))
        index = np.random.choice(choices, p=probabilities)

        return index

    def save(self, melody, step_duration=.25 , format="midi", filename="new_melody.midi"):
        stream = m21.stream.Stream()

        start_symbol = None
        step_counter = 1
        for i, symbol in enumerate(melody):
            if symbol != "_" or i+1==len(melody):
                if start_symbol is not None:
                    quarter_length_duration = step_duration *‌ step_counter

                    if start_symbol == "r":
                        m21_event = m21.note.Rest(quarterLength = quarter_length_duration)
                    else: 
                        m21_event = m21.note.Note(int(start_symbol), quarterLength = quarter_length_duration)

                    stream.append(m21_event)
                    step_counter=1 # reset for the next Note or Rest 

                start_symbol = symbol 
                    
            else:
                step_counter += 1

        stream.write(format, filename)
        
if __name__=="__main__":
    mg = MelodyGenerator()
    seed = "64 _ 62 _ 60 _ 59 _ 57 _"
    new_melody = mg.generate_melody(seed, 500, SEQUENCE_LENGTH, 0.7)
    print(new_melody)
    mg.save(new_melody)