In [54]:
import gensim
import glob
import json
import music21
import pprint

In [55]:
paths_to_processed = glob.glob('data/json/*.json')

In [56]:
def load_corpus():
    corpus = []
    for fp in paths_to_processed:
        with open(fp, 'r') as f:
            obj = json.load(f)
        corpus.append(obj['rows'])
    return corpus

In [68]:
def transpose_to_all(c):
    out = []
    old = c
    for i in range(12):
        new = old.transpose(7)
        out.append(new)
        old = new
    return out

def transpose_to_some(c, degree):
    out = []
    out.append(c)
    old = c
    for i in range(degree):
        new = old.transpose(7)
        out.append(new)
        old = new
    return out
        
def c_to_strep(c):
    rep = ""
    for i in range(12):
        if i in c.pitchClasses:
            rep += "1"
        else:
            rep += "0"
    return rep

def strep_to_c(strep):
    pcs = []
    for i, v in enumerate(strep):
        if v == '1':
            pcs.append(i)
    return music21.chord.Chord(pcs)

def strep_to_symbol(strep):
    c = strep_to_c(strep)
    return music21.harmony.chordSymbolFigureFromChord(c)

def strep_to_root(strep):
    c = strep_to_c(strep)
    try:
        return c.root()
    except:
        return "wat"

def pprint_topic(trained_model, dictionary, n):
    topic = trained_model.show_topic(n)
    return [(strep_to_root(dictionary[int(d)]), prop) for d, prop in topic]

In [65]:
corpus = load_corpus()

In [59]:
d = gensim.corpora.Dictionary(corpus)

In [60]:
bow_corpus = [d.doc2bow(text) for text in corpus]

In [66]:
lda = gensim.models.LdaModel(corpus=bow_corpus, num_topics=24)

In [69]:
for k in range(24):
    pprint.pprint(pprint_topic(lda, d, k))

[(<music21.pitch.Pitch G>, 0.12465196659873333),
 (<music21.pitch.Pitch D>, 0.059763669182571984),
 (<music21.pitch.Pitch D>, 0.054710818845493975),
 (<music21.pitch.Pitch G>, 0.043675586269370993),
 (<music21.pitch.Pitch G>, 0.043003869638753502),
 (<music21.pitch.Pitch D>, 0.03827953990867667),
 (<music21.pitch.Pitch G>, 0.034745733400039176),
 ('wat', 0.027723857319371006),
 (<music21.pitch.Pitch G>, 0.025506784301301193),
 (<music21.pitch.Pitch D>, 0.017979990641461437)]
[(<music21.pitch.Pitch B->, 0.054371357267690275),
 (<music21.pitch.Pitch E->, 0.044417846441861905),
 (<music21.pitch.Pitch C>, 0.03010742806236243),
 (<music21.pitch.Pitch C>, 0.02402922065708446),
 (<music21.pitch.Pitch B->, 0.020020327330933754),
 (<music21.pitch.Pitch B->, 0.019337243744485057),
 ('wat', 0.018340822549283649),
 (<music21.pitch.Pitch F>, 0.017457371755761286),
 (<music21.pitch.Pitch G>, 0.016823151769859896),
 (<music21.pitch.Pitch F>, 0.016710514392821004)]
[(<music21.pitch.Pitch E>, 0.0793253