In [42]:
import music21
import numpy as np
import scipy as sp
import json

In [2]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i+n]

def transpose_to_all(c):
    out = []
    old = c
    for i in range(12):
        new = old.transpose(7)
        out.append(new)
        old = new
    return out

def transpose_to_some(c, degree):
    out = []
    out.append(c)
    old = c
    for i in range(degree):
        new = old.transpose(7)
        out.append(new)
        old = new
    return out
        
def c_to_strep(c):
    rep = ""
    for i in range(12):
        if i in c.pitchClasses:
            rep += "1"
        else:
            rep += "0"
    return rep

def strep_to_c(strep):
    pcs = []
    for i, v in enumerate(strep):
        if v == '1':
            pcs.append(i)
    return music21.chord.Chord(pcs)

def strep_to_symbol(strep):
    c = strep_to_c(strep)
    return music21.harmony.chordSymbolFigureFromChord(c)

The idea here is from Arora et. al (2016). 

Use SVD on known relation pairs

1. Construct a list of pairs of chords in a relationship (a, b)
2. Look up their embedding vectors and subtract v_a - v_b
3. Calculate top k singular vectors
4. Look at correlation with top k singular vectors

Let's look at dominant sevenths going to tonics

In [134]:
a = music21.chord.Chord('C E G B-')
b = music21.chord.Chord('F A C')
c = music21.chord.Chord('F A- C')

In [135]:
all_a = transpose_to_all(a)
all_b = transpose_to_all(b)
all_c = transpose_to_all(c)

In [136]:
all_a_str = [c_to_strep(c) for c in all_a]
all_b_str = [c_to_strep(c) for c in all_b]
all_c_str = [c_to_strep(c) for c in all_c]

In [10]:
embeddings = np.load('./embeddings_lite.np.npy')
metadata = json.load(open('./metadata_lite.json'))

In [15]:
labels = [m[0].replace("\"","") for m in metadata[1:]]

In [187]:
pairs = list(zip(all_a_str, all_b_str)) + list(zip(all_a_str, all_c_str))

In [188]:
embeddings.shape, len(labels)

((1426, 100), 1426)

In [189]:
differences = []

for a, b in pairs:
    v_a = embeddings[labels.index(a)]
    v_b = embeddings[labels.index(b)]
    difference = v_a - v_b
    differences.append(difference)

In [190]:
dmat = np.array(differences)

In [191]:
dmat.shape

(24, 100)

In [198]:
from scipy.spatial import distance

In [200]:
U, s, V = sp.sparse.linalg.svds(dmat, k=6)
U.shape, s.shape, V.shape

((24, 6), (6,), (6, 100))

In [201]:
V.shape, dmat.shape

((6, 100), (24, 100))

In [209]:
for i in range(24):
    print(distance.cosine(V[3], dmat[i]))

0.878224849568
1.0106194718
0.738154559143
0.794753170696
0.954322005279
0.757469672714
0.322186673442
1.44031271426
0.858912043257
0.597902911819
0.923239117229
0.856307738041
0.81945090257
1.04372492962
0.923558594134
0.732038594518
0.854454458546
0.765536390633
0.389563351519
1.1743918713
0.982853044165
0.433756342486
0.896422284691
1.00013230295
