In [1]:
'''
Welcome to Music SketchNet! 
--------------------------
This is the first file you click in this folder to start.
We will guide you in each process of the Music SketchNet, including:
    1. data processing
    2. model construction
    3. model training/inferring
    4. evaluation.
--------------------------
This file is to process the dataset that used in Music SketchNet

Before going into the process, we would like you to know that we use self-defined MIDI_Loader to process the irish midi files
One of the problem you should notice is that there is a bias time (~1/960 sec) in the irish midi files
In that, we offset this bias in the Midi_Loader implementation (c_bias = 1.0 / 960)
With our codes, you can process the irish dataset.

If you want to process other datasets, you have two choices:
1) replace the c_bias = 1.0/960 with c_bias = 0.0, and perhaps you should check more about the difference between your midi files and irish midi files. 
2) check our code, and write your own processing script (we recommend this)

But at least, with our codes, you can use the irish dataset to go through all the process, which will give you a strong example of how to use it.

Please ignore the "Nottingham" we define in the MIDI_Loader. 
Most of these codes can help you to process the Nottingham, another folk songs dataset, but there might be some problems with it.
'''
import os
import copy
import random
import numpy as np
import pretty_midi as pyd
from loader.dataloader import MIDI_Loader

s_dir = "" # folder address
midi_path = "/Users/wxn/Desktop/surf/Music-SketchNet-master/data/IrishFolkSong/session/sessiontune1.mid"


In [2]:
# load data from Midis, because bpm = 120，so one beat time = 60 / 120 = 0.5
# And in 4/4 we divide 4 beat to 24 step/frames, each will be 0.5 * 4 / 24  = 0.5 / 6 sec
# It takes a little bit long time. 
ml = MIDI_Loader("Irish",minStep = 0.5 / 6)
ml.load_single_midi(midi_path)

Dataset Name: Irish
start to load mid from /Users/wxn/Desktop/surf/Music-SketchNet-master/data/IrishFolkSong/session/sessiontune1.mid


In [3]:
# process all files to the mesaure data for VAE tranining
s = ml.processed_all()

start process Irish Folk Song dataset
start to get notes
find ahead notes in Irish Folk Song dataset in /Users/wxn/Desktop/surf/Music-SketchNet-master/data/IrishFolkSong/session/sessiontune1
get notes in 1 files
calc notes success! 1 files in total
processing succeed


In [4]:
#  now you would able to see the files like
print(s[0])

{'name': '/Users/wxn/Desktop/surf/Music-SketchNet-master/data/IrishFolkSong/session/sessiontune1', 'raw': <pretty_midi.pretty_midi.PrettyMIDI object at 0x7fa312e9c250>, 'notes': [74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 72, 128, 128, 128, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 67, 128, 128, 66, 128, 128, 69, 128, 128, 67, 128, 128, 64, 128, 128, 60, 128, 128, 64, 128, 128, 67, 128, 128, 69, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 72, 128, 128, 128, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 71, 128, 128, 71, 128, 128, 69, 128, 128, 71, 128, 128, 74, 128, 128, 72, 128, 128, 69, 128, 128, 67, 128

In [5]:
# in order to save space, we only need the notes, so we delete the "raw" in the processed dict
for i in range(len(s)):
    s[i]["raw"] = ""

In [7]:
print(s)

[{'name': '/Users/wxn/Desktop/surf/Music-SketchNet-master/data/IrishFolkSong/session/sessiontune1', 'raw': '', 'notes': [74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 72, 128, 128, 128, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 67, 128, 128, 66, 128, 128, 69, 128, 128, 67, 128, 128, 64, 128, 128, 60, 128, 128, 64, 128, 128, 67, 128, 128, 69, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 72, 128, 128, 128, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 71, 128, 128, 71, 128, 128, 69, 128, 128, 71, 128, 128, 74, 128, 128, 72, 128, 128, 69, 128, 128, 67, 128, 128, 64, 128, 128, 60, 128, 128, 64, 128, 128, 67, 128, 

In [6]:
# Save it to npy
np.save("data/irish_single.npy", s)

In [15]:
# for testing

data = np.load("data/irish_single.npy", allow_pickle=True)
# print(data)

for d in data:
    del d['raw']
    print(d)
    print(len(d))

print(data)

np.save("data/irish_single_correct.npy", data)

data = np.load("data/irish_train.npy", allow_pickle=True)
print(data)

{'name': '/Users/wxn/Desktop/surf/Music-SketchNet-master/data/IrishFolkSong/session/sessiontune1', 'notes': [74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 72, 128, 128, 128, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 67, 128, 128, 66, 128, 128, 69, 128, 128, 67, 128, 128, 64, 128, 128, 60, 128, 128, 64, 128, 128, 67, 128, 128, 69, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 69, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 74, 128, 128, 62, 128, 128, 62, 128, 62, 128, 62, 128, 128, 72, 128, 128, 128, 128, 128, 71, 128, 128, 72, 128, 128, 74, 128, 128, 71, 128, 128, 71, 128, 128, 69, 128, 128, 71, 128, 128, 74, 128, 128, 72, 128, 128, 69, 128, 128, 67, 128, 128, 64, 128, 128, 60, 128, 128, 64, 128, 128, 67, 128, 128, 69, 128

[{'name': 'sessiontune39639', 'notes': [77, 128, 128, 128, 128, 128, 128, 128, 128, 75, 128, 128, 77, 128, 128, 128, 128, 128, 128, 128, 128, 75, 128, 128, 73, 128, 128, 77, 128, 128, 77, 128, 128, 75, 128, 128, 73, 128, 128, 70, 128, 128, 70, 128, 128, 128, 128, 128, 72, 128, 128, 75, 128, 128, 75, 128, 128, 73, 128, 128, 72, 128, 128, 68, 128, 128, 70, 128, 128, 72, 128, 128, 73, 128, 128, 70, 128, 128, 72, 128, 128, 68, 128, 128, 70, 128, 128, 68, 128, 128, 65, 128, 128, 68, 128, 128, 77, 128, 128, 128, 128, 128, 128, 128, 128, 75, 128, 128, 77, 128, 128, 78, 128, 128, 77, 128, 128, 75, 128, 128, 73, 128, 128, 77, 128, 128, 77, 128, 128, 75, 128, 128, 73, 128, 128, 70, 128, 128, 70, 128, 128, 128, 128, 128, 72, 128, 128, 75, 128, 128, 75, 128, 128, 73, 128, 128, 72, 128, 128, 68, 128, 128, 70, 128, 128, 72, 128, 128, 73, 128, 128, 70, 128, 128, 72, 128, 128, 68, 128, 128, 70, 128, 128, 128, 128, 128, 128, 128, 128, 75, 128, 128]}
 {'name': 'sessiontune22694', 'notes': [57, 128, 128,

In [7]:
'''
In this file
We do data processing for SketchVAE training
Since it requires rhythm and pitch tokens, we should process the previous data into the rhythm and pitch data first.
'''
from loader.dataloader import DataLoader
import numpy as np

data_path = [
    "data/irish_train.npy",
    "data/irish_validate.npy",
    "data/irish_test.npy",
    "data/irish_single.npy"
]

In [8]:
import data
# train_x = np.load(data_path[0],allow_pickle = True)
# validate_x = np.load(data_path[1],allow_pickle = True)
# test_x = np.load(data_path[2],allow_pickle = True)

single = np.load(data_path[3],allow_pickle = True)

In [9]:
# note extraction
hold_state = 128
rest_state = 129
def extract_note(x, pad_token = 128):
    d = []
    for i in x:
        if i < 128:
            d.append(i)
    ori_d = len(d)
    d.extend([pad_token] * (len(x) - len(d)))
    return np.array(d), ori_d

def extract_rhythm(x, hold_token = 2, rest_token = 3):
    d = []
    for i in x:
        if i < 128:
             d.append(1)
        elif i == hold_state:
             d.append(hold_token)
        else:
             d.append(rest_token)
    return np.array(d)


In [10]:
# process rhythm and pitch tokens
split_size = 24
new_data = []
# change here to be train_x/validate_x/test_x
for i,d in enumerate(single):
    d = np.array(d["notes"])
    ds = np.split(d, list(range(split_size,len(d),split_size)))
    data = []
    for sd in ds:
        if len(sd) != split_size:
            continue
        q,k = extract_note(sd)
        if k == 0:
            continue
        s = extract_rhythm(sd)
        data.append([sd,q,s,k])
    new_data.append(data)
    if i % 1000 == 0:
        print("processed:", i)

processed: 0


In [11]:
# now you can see what processed data looks like [gd, pitch, rhythm, pitch_len]
print(new_data[0][0])

[array([ 74, 128, 128,  62, 128, 128,  62, 128,  62, 128,  62, 128, 128,
        69, 128, 128,  62, 128, 128,  62, 128,  62, 128,  62]), array([ 74,  62,  62,  62,  62,  69,  62,  62,  62,  62, 128, 128, 128,
       128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]), array([1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 1,
       2, 1]), 10]


In [12]:
# we need to extract each measure in each song
final_data = []
for d in new_data:
    for dd in d:
        final_data.append(dd)
print(len(final_data))

71


In [13]:
# save the data named: irish_xxx_chord_rhythm.npy
np.save("data/irish_single_chord_rhythm.npy",final_data)

  arr = np.asanyarray(arr)
