In [21]:
import tensorflow as tf
import numpy as np
import pickle
import os

In [22]:
# loads the embeddings variable from the tensorflow checkpoint
def load_embeddings():
    with tf.Session() as session:
        if os.listdir('./data/processed/'):
            saver = tf.train.import_meta_graph('./data/processed/model.ckpt-1.meta')
            saver.restore(session, tf.train.latest_checkpoint('./data/processed/'))
            return session.run('embedding:0')

In [23]:
def get_vocabulary(path):
    """Get all the information about the vocabulary from file. """
    vocabulary_path = os.path.join(path, 'vocabulary')
    with open(vocabulary_path, 'r') as f:
        vocabulary_size = int(f.readline())
        count = []
        dictionary = dict()
        reverse_dictionary = []
        for i, line in enumerate(f):
                word, n = line.split()
                count.append((word, n))
                dictionary[word] = i
                reverse_dictionary.append(word)
    return vocabulary_size, count, dictionary, reverse_dictionary

In [24]:
def closest(x, take=6):
    z = np.sum(np.square(x - embeddings), axis=1)
    return np.argsort(z)[:take]

In [25]:
def closest_by_name(chord):
    indices = closest(embeddings[dictionary[chord]])
    chords = []
    for i in indices:
        chords.append(reverse_dictionary[i])
    return chords

In [26]:
def compare(a, b, c):
    x1 = embeddings[dictionary[a]]
    y1 = embeddings[dictionary[b]]
    y2 = embeddings[dictionary[c]]
    # x1 - y1 = x2 - y2
    # or
    # x1 - y1 + y2 = x2
    # a - b + c
    indices = closest(x1-y1+y2)
    
    chords = []
    for i in indices:
        chords.append(reverse_dictionary[i])
    return chords    

In [27]:
embeddings = load_embeddings()

INFO:tensorflow:Restoring parameters from ./data/processed/model.ckpt-1


In [34]:
dictionary['UNK']

0

In [16]:
vocabulary_size, count, dictionary, reverse_dictionary = get_vocabulary('data')

In [17]:
print('most common chords are')
for chord, n in count[1:26]:
    print(chord, ':', n)

most common chords are
Gb : 1015
B : 813
E : 789
EAbB : 703
CEG : 698
GbB : 637
A : 624
DGb : 581
DGbB : 564
D : 548
DbEA : 545
AbB : 487
Ab : 475
Db : 475
GbA : 439
DEAbBbB : 427
Bb : 426
EAb : 402
DGbGB : 377
DbA : 364
Eb : 362
EG : 362
GbGB : 356
DGbA : 351
EGb : 347


In [20]:
for chord, n in count[1:50]:
    print('closest to', chord,':', closest_by_name(chord)[1:])

closest to Gb : ['GbB', 'UNK', 'DGbB', 'DGb', 'DbEbAB']
closest to B : ['GbB', 'D', 'DGbAB', 'CEbEB', 'EbB']
closest to E : ['DEbAbB', 'DbDGb', 'D', 'EbE', 'DbEbE']
closest to EAbB : ['EFAbB', 'EbEAbB', 'AbB', 'EbEB', 'CEB']
closest to CEG : ['CEGB', 'EbGbGB', 'CEGBbB', 'CEGbGB', 'DbDEbFGbGBb']
closest to GbB : ['Gb', 'B', 'EGbGAB', 'DbEbAB', 'UNK']
closest to A : ['DEbGbA', 'DAB', 'EA', 'GbA', 'DEA']
closest to DGb : ['Gb', 'D', 'GbB', 'DFGb', 'FGbBb']
closest to DGbB : ['Gb', 'CDGbB', 'GbB', 'DbEbAB', 'DB']
closest to D : ['DGb', 'B', 'DBbB', 'GbGA', 'DEAb']
closest to DbEA : ['DEAB', 'Db', 'DbDEGA', 'CEbEA', 'DbE']
closest to AbB : ['EAbB', 'EbAbB', 'CDbEFAb', 'CEbEAbB', 'EbEAbB']
closest to Ab : ['EAbA', 'UNK', 'GbAb', 'EbAbB', 'AbA']
closest to Db : ['DbEbA', 'DbE', 'A', 'UNK', 'DbEA']
closest to GbA : ['A', 'DbEbGbAbA', 'DGbA', 'CGbGA', 'DbEGbB']
closest to DEAbBbB : ['DEBb', 'DEAbABbB', 'DEAbB', 'DbD', 'DEAb']
closest to Bb : ['GbGA', 'DEAbBb', 'CEbEB', 'CDbEFAb', 'C']
closest t

In [90]:
def random_walk(init, length):
    walk = []
    for _ in range(length):
        walk.append(init)
        new = np.random.choice(closest(embeddings[init], take=12))
        init = new
    return walk

In [91]:
def to_freq(chord):
    notes = {'C':60, 'D':62, 'E':64, 'F':65, 'G':67, 'A':69, 'B': 71}
    flat = 0
    freqs = []
    for x in reversed(chord):
        if x == 'b':
            flat = -1
        else:
            freqs.append(notes[x] + flat)
            flat = 0
    return list(reversed(freqs))

In [92]:
import mido

In [93]:
def make_midi(sequence):
    delta = 400
    mid = mido.MidiFile()
    mid.type = 0
    track = mido.MidiTrack()
    mid.tracks.append(track)
    for freqs in sequence:
        for freq in freqs:
            track.append(mido.Message('note_on', note=freq, velocity=64, time=0))
        step = 1
        for freq in freqs:
            track.append(mido.Message('note_off', note=freq, velocity=64, time=step*delta))
            step = 0
    mid.save('random.mid')

In [94]:
make_midi([to_freq(reverse_dictionary[x]) for x in random_walk(40,100)])