In [22]:
import os
import music21 as m21
from music21 import environment
import json
import numpy as np
import tensorflow.keras as keras

In [9]:
m21.__version__

'8.3.0'

### What is Music21?

Simply, It is a package that we can used as a converting tool. <br>
E.g., kern, MIDI, MusicMXL ===> m21 ===> kern, MIDI, ....

[Music21 Documentation](https://web.mit.edu/music21/doc/)

<br><br>

In [10]:
PATH_TO_MUSESCORE_EXECUTABLE = r"C:\Program Files\MuseScore 4\bin\MuseScore4.exe"
KERN_DATASET_PATH = "deutschl/test"
SAVE_DIR = "dataset"
SINGLE_FILE_DATASET = "file_dataset"
SEQUENCE_LENGTH = 64
MAPPING_PATH = "mapping.json"

# durations are expressed in quarter length
ACCEPTABLE_DURATIONS = [
    0.25,  # 16th note
    0.5,  # 8th note
    0.75,  # dotted note
    1.0,  # quarter note
    1.5,  # dotted quater note
    2,  # half note
    3,  # quarter note (time-signature)
    4  # whole note
]

<br>

## Setup Music21 environment variable

In [11]:
environment.set("musicxmlPath", PATH_TO_MUSESCORE_EXECUTABLE)

<br>

## Load flok songs

In [12]:
def load_songs_in_kern(dataset_path):

    songs = []
    
    # go through all the files in dataset and load them with music21
    for path, sub_dirs, files in os.walk(dataset_path):
        for file in files:
            # filtering kern files
            if file[-3:] == "krn":
                song = m21.converter.parse(os.path.join(path, file))
                songs.append(song)

    return songs
    

<br>

## Filter out songs that have non-acceptable durations

In [13]:
def has_acceptable_durations(song, acceptable_duration):
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_duration:
            return False
    return True

<br>

## Transpose songs to C_maj / A_min

In [14]:
def transpose(song):
    # get key from the song
    parts = song.getElementsByClass(m21.stream.Part)
    measures_part0 = parts[0].getElementsByClass(m21.stream.Measure)
    key = measures_part0[0][4]
    
    # if key not exist, estimate key using Music21
    if not isinstance(key, m21.key.Key):
        key = song.analyze("key")

    # print(key)

    # get interval for transposition. E.g., Bmaj => Cmaj
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))

    # transpose song by calculated interval
    transposed_song = song.transpose(interval)

    return transposed_song

<br>

## Encode Song

In [15]:
def encode_song(song, time_step=0.25):
    # e.g., p = 60, d = 1.0 => [60, "_", "_"]

    encoded_song = []

    for event in song.flat.notesAndRests:
        # handle notes
        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi  # in our case, its 60
        # handle rests
        elif isinstance(event, m21.note.Rest):
            symbol = "r"

        # convert the notes & rests into time-series notation
        steps = int(event.duration.quarterLength / time_step)
        
        for step in range(steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")

    # cast encoded song to a str
    encoded_song = " ".join(map(str, encoded_song))  # convert to string and join

    return encoded_song
        

<br>

## Data preprocessing

In [16]:
def preprocess(dataset_path):
    # load the folk songs
    print('Loading songs ...')
    songs = load_songs_in_kern(dataset_path)
    print(f"Loaded {len(songs)} songs.")

    for i, song in enumerate(songs):
        # filter out songs that have non-acceptable durations
        if not has_acceptable_durations(song, ACCEPTABLE_DURATIONS):
            continue
        
        # transpose songs to C_maj / A_min
        song = transpose(song)
    
        # encode songs with music time series representation
        encoded_song = encode_song(song)
    
        # save songs to text file
        save_path = os.path.join(SAVE_DIR, str(i))
        with open(save_path, "w") as fp:
            fp.write(encoded_song)
    

<br>

## Crate single dataset

In [17]:
def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()

    return song
    

def create_single_file_dataset(dataset_path, file_dataset_path, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    songs = ""
    
    # load encoded songs and add delimiters
    for path, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(path, file)
            song = load(file_path)
            songs = songs + song + " " + new_song_delimiter

    # remove unnecessary delimeter
    songs = songs[:-1]

    # save string that contains all the dataset
    with open(file_dataset_path, "w") as fp:
        fp.write(songs)

    return songs

<br>

## Create mapping

In [18]:
def create_mapping(songs, mapping_path):
    mappings = {}
    
    # identify the vocabulary
    songs = songs.split()  # string to list convertion
    vocabulary = list(set(songs))

    # create mappings
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # save vocabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)

<br>

## Convert songs to integers

In [27]:
def convert_songs_to_int(songs):
    int_songs = []
    
    # load the mappings
    with open(MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)

    # cast songs string to a list
    songs = songs.split()

    # map songs to int
    for symbol in songs:
        int_songs.append(mappings[symbol])

    return int_songs

<br>

## Generate training sequences

In [31]:
def generate_training_sequences(sequence_length, file_dataset_path):
    # [11, 12, 13, 14, ...]  ==> inputs: [11, 12], targets: [13] | i: [12, 13], t: [14]
    
    # load songs and convert them to int
    songs = load(file_dataset_path)
    int_songs = convert_songs_to_int(songs)

    # generate the training sequences
    # if we have 100 symbols & seq. len. = 64  ==> we can generate 100 - 64 = 36 sequences
    # each sequence have 64 time steps
    inputs = []
    targets = []
    
    num_sequences = len(int_songs) - sequence_length
    for i in range(num_sequences):
        inputs.append(int_songs[i:i+sequence_length])
        targets.append(int_songs[i+sequence_length])

    # one-hot encode the sequences
    # shape of the inputs: (# of sequences, sequence length)
    # [[0, 1, 2], [1, 2, 3]] ==> [[[1, 0, 0], [0, 1, 0], [0, 0, 2]], [[], [], []]]
    vocabulary_size = len(set(int_songs))
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)    
    targets = np.array(targets)

    return inputs, targets

<br><br>

In [36]:
if __name__ == "__main__":
    preprocess(KERN_DATASET_PATH)
    songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_DATASET, SEQUENCE_LENGTH)

    create_mapping(songs, MAPPING_PATH)

    inputs, targets = generate_training_sequences(SEQUENCE_LENGTH, SINGLE_FILE_DATASET)
    
    
    # transposed_song.show("musicxml")

Loading songs ...
Loaded 12 songs.


In [37]:
inputs

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.]],

       [[0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.]],

       [[0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.],
        ...,
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 1.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [40]:
print(targets)
len(targets)

[17 17 17 ... 14 14 14]


2512

In [41]:
inputs.shape

(2512, 64, 18)

In [42]:
targets.shape

(2512,)