In [1]:
import numpy as np
from tqdm import tqdm
import random

In [2]:
dataset = np.load("../data/formatted/dataset.npy", allow_pickle=True)
midi_dataset = np.load("../data/formatted/midi_dataset.npy", allow_pickle=True)
meta_dataset = np.load("../data/formatted/meta_augmented.npy", allow_pickle=True)

print(dataset.shape, midi_dataset.shape, meta_dataset.shape)

(48072, 2048) (48072, 2048, 8) (48072,)


In [16]:
print(dataset[0])

['<style>' 'Jazz' 'Tonality' ... '<pad>' '<pad>' '<pad>']


In [3]:
#Token from dataset
tokens = np.unique(np.concatenate(dataset.tolist()))
print(len(tokens))

np.save("../data/formatted/tokens.npy", list(tokens))

195


In [4]:
stoi = { tk:i for i,tk in enumerate(tokens) }
itos = { i:tk for i,tk in enumerate(tokens) }

print(stoi)

{'.': 0, '/': 1, '0.3997395833333333': 2, '0.4440104166666667': 3, '0.5': 4, '0.5703125': 5, '0.6666666666666666': 6, '0.75': 7, '0.7994791666666666': 8, '0.8880208333333334': 9, '1.0': 10, '1.1419270833333333': 11, '1.3333333333333333': 12, '1.5': 13, '1.5989583333333333': 14, '1.7135416666666667': 15, '128 Feel': 16, '2.0': 17, '2.25': 18, '2.3997395833333335': 19, '2.6666666666666665': 20, '3.0': 21, '4.0': 22, ':|': 23, '<end>': 24, '<pad>': 25, '<start>': 26, '<style>': 27, 'A': 28, 'A major': 29, 'A minor': 30, 'A#': 31, 'A##': 32, 'Ab': 33, 'Ab major': 34, 'Ab minor': 35, 'Abb': 36, 'Afoxé': 37, 'Afro': 38, 'B': 39, 'B major': 40, 'B minor': 41, 'B#': 42, 'B##': 43, 'Baião': 44, 'Ballad': 45, 'Bb': 46, 'Bb major': 47, 'Bb minor': 48, 'Bbb': 49, 'Blues': 50, 'Bolero': 51, 'Bolero-Cha': 52, 'Bossa': 53, 'C': 54, 'C major': 55, 'C minor': 56, 'C#': 57, 'C##': 58, 'Calypso': 59, 'Cb': 60, 'Cbb': 61, 'Cha Cha': 62, 'Chacarera': 63, 'Choro': 64, 'Country Ballad': 65, 'D': 66, 'D major

In [5]:
#Get the number of real songs
realSongs = len(dataset)/12
tenPercent = int(0.1 * realSongs)

#random a number without repeating number
randomList = random.sample(range(0, int(realSongs)), tenPercent)

#if number is bigger than 12 multiply it by 12
for i in range(len(randomList)):
    randomList[i] = randomList[i] * 12

#populate a random list with the 12 subsequent numbers per value
final_random_list=[]
for number in randomList:
    for i in range(12):
        final_random_list.append(number+i)

#check if a number is duplicated 
print(len(final_random_list) == len(set(final_random_list)))

True


In [6]:
#save the random list
np.save('../data/formatted/final_random_list.npy', final_random_list)

In [7]:
#split dataset and validation using the random list
dataset_test = dataset[final_random_list]
midi_test = midi_dataset[final_random_list]
meta_test = meta_dataset[final_random_list]

dataset_train = np.delete(dataset, final_random_list, axis=0)
midi_train = np.delete(midi_dataset, final_random_list, axis=0)

In [47]:
tokens = np.load("../data/formatted/tokens.npy", allow_pickle=True)
print(tokens)

['.' '/' '0.3997395833333333' '0.4440104166666667' '0.5' '0.5703125'
 '0.6666666666666666' '0.75' '0.7994791666666666' '0.8880208333333334'
 '1.0' '1.1419270833333333' '1.3333333333333333' '1.5'
 '1.5989583333333333' '1.7135416666666667' '128 Feel' '2.0' '2.25'
 '2.3997395833333335' '2.6666666666666665' '3.0' '4.0' ':|' '<end>'
 '<pad>' '<start>' '<style>' 'A' 'A major' 'A minor' 'A#' 'A##' 'Ab'
 'Ab major' 'Ab minor' 'Abb' 'Afoxé' 'Afro' 'B' 'B major' 'B minor' 'B#'
 'B##' 'Baião' 'Ballad' 'Bb' 'Bb major' 'Bb minor' 'Bbb' 'Blues' 'Bolero'
 'Bolero-Cha' 'Bossa' 'C' 'C major' 'C minor' 'C#' 'C##' 'Calypso' 'Cb'
 'Cbb' 'Cha Cha' 'Chacarera' 'Choro' 'Country Ballad' 'D' 'D major'
 'D minor' 'D#' 'D##' 'Db' 'Db major' 'Db minor' 'Dbb' 'Disco' 'Dreamlike'
 'E' 'E major' 'E minor' 'E#' 'E##' 'Eb' 'Eb major' 'Eb minor' 'Ebb'
 'Even 16ths' 'Even 8ths' 'F' 'F major' 'F minor' 'F#' 'F##' 'Fb' 'Fbb'
 'Folk' 'Form_A' 'Form_B' 'Form_C' 'Form_Coda' 'Form_D' 'Form_Segno'
 'Form_intro' 'Form_verse' 'F

In [72]:
import voicing as vc 
import importlib
importlib.reload(vc)
voicing = vc.Voicing()


id = 49
test_this_song = dataset_test[id]
print(test_this_song)

midi, _ = voicing.convert_chords_to_voicing(test_this_song)
print(midi)

voicing.export_to_midi(midi, "test_"+ str(id))

['<style>' 'Bolero' 'Tonality' ... '<pad>' '<pad>' '<pad>']
[([0, 0, 0, 0, 0, 0, 0, 0], 0.0, '<style>'), ([0, 0, 0, 0, 0, 0, 0, 0], 0.0, 'Bolero'), ([0, 0, 0, 0, 0, 0, 0, 0], 0.0, 'Tonality'), ([0, 0, 0, 0, 0, 0, 0, 0], 0.0, 'Db major'), ([0, 0, 0, 0, 0, 0, 0, 0], 0.0, '<start>'), ([0, 0, 0, 0, 0, 0, 0, 0], 0.0, 'Form_A'), ([0, 0, 0, 0, 0, 0, 0, 0], 0.0, '|'), ([0, 0, 0, 0, 0, 0, 0, 0], 4.0, '.'), ([51, 0, 0, 0, 0, 0, 0, 0], 4.0, 'Eb'), ([51, 61, 65, 66, 0, 0, 0, 0], 4.0, 'm7'), ([0, 0, 0, 0, 0, 0, 0, 0], 4.0, '|'), ([51, 61, 65, 66, 0, 0, 0, 0], 1.0, '.'), ([0, 0, 0, 0, 0, 0, 0, 0], 1.0, '1.0'), ([54, 0, 0, 0, 0, 0, 0, 0], 1.0, 'F#'), ([54, 64, 68, 69, 0, 0, 0, 0], 1.0, 'm7'), ([54, 64, 68, 69, 0, 0, 0, 0], 3.0, '.'), ([47, 0, 0, 0, 0, 0, 0, 0], 3.0, 'B'), ([47, 57, 61, 63, 66, 0, 0, 0], 3.0, 'dom7'), ([47, 57, 63, 66, 60, 0, 0, 0], 3.0, 'add b9'), ([0, 0, 0, 0, 0, 0, 0, 0], 3.0, '|'), ([47, 57, 63, 66, 60, 0, 0, 0], 4.0, '.'), ([52, 0, 0, 0, 0, 0, 0, 0], 4.0, 'E'), ([52, 63, 68, 71, 

In [22]:
#save the train and test dataset
#test
np.save('../data/formatted/dataset_test.npy', dataset_test)
np.save('../data/formatted/midi_test.npy', midi_test)
np.save('../data/formatted/meta_test.npy', meta_test)
#train
np.save('../data/formatted/dataset_train.npy', dataset_train)
np.save('../data/formatted/midi_train.npy', midi_train)

In [23]:
import random
#create a file with shuffled reference index
def createWindowedShuffleReference(type, size, window, save = False):
    s = np.arange(0, size, 1)
    #num = np.arange(0, len(data)/10, 1)
    np.random.shuffle(s)

    n = int(size/window)
    numlist = random.sample(range(n), n)
    numlist = np.array(numlist)
    numlist = numlist * window

    m = np.max(numlist)
    l_ref = size-window
    print('real:', size, 'max:', m, 'length_ref:',l_ref)

    if m != l_ref:
        rest = m - l_ref
        numlist = numlist - rest

    ref = []
    for num in numlist:
        if num == 0:
            print("OK")
        for i in range(0,window):
            ref.append(num+i)

    #return the shuffled list
    if save:
        np.savetxt("../data/shuffle_" + type + ".txt", ref, fmt='%i', delimiter=" ", header='Array shape: ('+str(size)+', 1)')
    return ref

In [24]:
def getData(folder, name):
    data_path = folder + '/' + name
    data = np.loadtxt(data_path)
    f = open(data_path, "r")
    format = f.readline().replace('# Array shape: (', '').replace('\n', '').replace(')', '')
    format = np.array(format.split(', ')).astype(int)
    f.close()
    return data, format

In [25]:
train_dataset = np.load('../data/formatted/dataset_train.npy', allow_pickle=True)
test_dataset = np.load('../data/formatted/dataset_test.npy', allow_pickle=True)

train_midi = np.load('../data/formatted/midi_train.npy', allow_pickle=True)
test_midi = np.load('../data/formatted/midi_test.npy', allow_pickle=True)

print(train_dataset.shape, train_midi.shape, test_dataset.shape, test_midi.shape)

BATCH_SHUFFLE_SIZE = 1
ref = createWindowedShuffleReference("train", len(train_dataset), BATCH_SHUFFLE_SIZE, True)
ref_test = createWindowedShuffleReference("test", len(test_dataset), BATCH_SHUFFLE_SIZE, True)

# first shuffle the train dataset
shuffle_train, format_train = getData('../data', 'shuffle_train.txt')
shuffle_train = shuffle_train.reshape(format_train[0], ).astype(int)
shuffle_train = shuffle_train.tolist()
dataset = train_dataset[shuffle_train]
midiDataset = train_midi[shuffle_train]

#second shuffle the test dataset
shuffle_test, format_test = getData('../data', 'shuffle_test.txt')
shuffle_test = shuffle_test.reshape(format_test[0], ).astype(int)
shuffle_test = shuffle_test.tolist()
validation = test_dataset[shuffle_test]
midi_validation = test_midi[shuffle_test]

(43272, 2048) (43272, 2048, 8) (4800, 2048) (4800, 2048, 8)
real: 43272 max: 43271 length_ref: 43271
OK
real: 4800 max: 4799 length_ref: 4799
OK


In [26]:
np.save('../data/shuffled/dataset_train.npy', dataset)
np.save('../data/shuffled/midi_train.npy', midiDataset)
np.save('../data/shuffled/dataset_test.npy', validation)
np.save('../data/shuffled/midi_test.npy', midi_validation)