In [30]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.cluster import KMeans
import sys, re, itertools, random, copy, pickle

In [35]:
!pip3 install MIDIUtil

Collecting MIDIUtil
  Using cached MIDIUtil-1.1.3.tar.gz
Building wheels for collected packages: MIDIUtil
  Running setup.py bdist_wheel for MIDIUtil: started
  Running setup.py bdist_wheel for MIDIUtil: finished with status 'done'
  Stored in directory: C:\Users\Jonathan\AppData\Local\pip\Cache\wheels\1b\3f\74\ed962cae95850ac6d38c6ebd52703caec027b6db8e0d751bdd
Successfully built MIDIUtil
Installing collected packages: MIDIUtil
Successfully installed MIDIUtil-1.1.3


In [1]:
import MIDIUtil

ModuleNotFoundError: No module named 'MIDIUtil'

In [2]:
# Read in the data and generate the offsets.
# It's okay to generate offsets here since are trigram notes.
# Recall: len = how long note lasts, offset = when it's hit (e.g. stopwatch).
numberofitems = 12345678910112 # some huge placeholder
data = pd.read_csv('./oscar2ngrams.txt', names=['Note','Len'])

# toggle if want to limit to first k items
# 1078 notes in original MIDI file
# numberofitems = 1078
# data = data[:numberofitems]

# Re-add offsets
totaloffset = 0
offsets = []
for i in data['Len']:
    offsets.append(totaloffset)
    totaloffset += i
data["Offset"] = pd.Series(offsets)
data.head()

Unnamed: 0,Note,Len,Offset
0,D5,0.5,0.0
1,C#5,0.25,0.5
2,A5,0.25,0.75
3,D5,0.25,1.0
4,B-4,0.5,1.25


In [3]:
""" Visualization #1: Initial plotting of length over offset. """

# Plot the length over offset.
# *args is some (n, 2) array you want to plot
def plotTiming(data, labels=None, clustercenters=None):
    numberofitems = len(data)
    
    # generate colors
    clusterCodes = dict()
    if labels is not None:
        for i in labels:
            r = lambda: random.randint(0,255)
            clusterCodes[i] = ('#%02X%02X%02X' % (r(),r(),r())).lower()
    
    # Initialize the graph
    dx = data['Offset']
    dy = data['Len']
    dn = data['Note']
    plt.plot(dx, dy, 'm.--', linewidth=1.5)
    for ix, (x, y) in enumerate(zip(dx, dy)):
        color = 'ko'
        if labels is not None:
            color = clusterCodes[labels[ix]]
            plt.plot(x, y, 'x', ms=15, mew=1.5, color=color)
            continue
        plt.plot(x, y, color)

    # plot the cluster centers if available
    if clustercenters is not None:
        for currColorIx, i in enumerate(clustercenters):
            cx = i[0]
            cy = i[1]
            color = clusterCodes[currColorIx]
            plt.plot(cx, cy, 'ko', mew=0, ms=7.5) # plot black. same color: color=color
                
    # plot the ticks if under certain # of points
    if numberofitems <= 100:
        plt.xticks(range(0, int(max(dx)) + 1))

    # Annotate with note data only if under certain # of points
    # (Otherwise, it gets too messy!)
    if numberofitems <= 100 and labels is None:
        for note, offset, length in izip(dn, dx, dy):
            plt.annotate(note, xy=(offset, length), color='g')

    # Enter title
    plt.title('Generated N-Grams', fontsize=20, horizontalalignment='center')
        
    # set fig limits, size, and other display things
    fig = plt.gcf()
    ax = plt.gca()
    plt.ylim([0, max(dy)+ 0.25])
    plt.xlim([min(dx) - 1, max(dx) + 1])
    plt.ylabel('Duration', fontsize=16)
    plt.xlabel('Offset', fontsize=16)
    plt.grid()
    fig = plt.gcf()
    fig.set_size_inches(18, 6)
    # plt.xkcd()
    ax.xaxis.grid(False)

In [5]:
""" Visualization #2: K-Means Clustering. """

notesX = data["Offset"].values.reshape(-1, 1)
notesY = data["Len"].values.reshape(-1, 1)
notesXY = np.concatenate((notesX, notesY), axis=1)
notenames = np.array([i for i in data["Note"]])
km = KMeans(n_clusters=int(np.sqrt(len(notesX) / 2)))
km.fit(notesXY)
kmlabels = km.labels_
# plotTiming(data, labels=kmlabels, clustercenters=km.cluster_centers_)

In [9]:
""" Collect the clusters for use later. """

# Iterate over notes and labels, aggregating clusters indicated by the unique labels.
# Can't use groupby() since you might have multiple labels.
allclusters = [] # this will be a list of (list of notes)s
currlabel = kmlabels[0]
currcluster = []
for ix, note, label in zip(range(len(notenames)), notenames, kmlabels):
    if currlabel == label:
        currcluster.append(note)
    else:
        currlabel = label
        allclusters.append(currcluster)
        currcluster = []
        currcluster.append(note)

In [18]:
""" Read in the chord data. """

# Import the chord data.
allchords = pd.read_csv('oscar2chords.txt', skiprows=2)[:].sort_values("Offset")
allchords.index = range(1, len(allchords) + 1)
with open('oscar2chords.txt', 'r') as f:
    metmark = float(f.readline())
    tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
    
print("Metronome, Timesig Numerator, Timesig Denominator, # chords played")
print(metmark, tsig_num, tsig_den, len(allchords))
allchords.sort_values("Offset", ascending=True)[:10]
allchords.head()

Metronome, Timesig Numerator, Timesig Denominator, # chords played
176.0 4 4 297


Unnamed: 0,FullName,CommonName,Len,Offset
1,Chord {D in octave 5 | C in octave 4 | E in oc...,A6-perfect-fourth minor tetrachord,1.125,8.0
2,Chord {A in octave 3 | G in octave 3 | E in oc...,A3-incomplete dominant-seventh chord,1.25,8.0
3,Chord {E in octave 6 | E in octave 4 | D in oc...,D6-quartal trichord,1.375,9.625
4,Chord {C in octave 4 | A in octave 5} Dotted Q...,A5-interval class 3,1.5,9.625
5,Chord {G in octave 3 | A in octave 3} Quarter ...,A3-interval class 2,1.666667,9.625


In [25]:
""" Parse chords into form suitable for analysis. """

# Iterate over a list in chunks of size n. Return tuples (for dict).
def chunks(iterable, n):
    for ix, item in enumerate(iterable):
        if ix == len(iterable) - (n-1): return
        yield tuple(iterable[ix:ix+n])
        
# Convert music21 note to mingus note.
# This version (different from that in 3. Play Notes)
# doesn't return a Note object: returns a string.
def mingifytext(note):
    accidental = re.compile("[A-Z](-|#)[0-9]")
    if accidental.match(note):
        if '-' not in note: note = "%s%s-%s" % (note[0], note[1], note[2])
        else: note = note.replace('-', 'b-')
    else: note = "%s-%s" % (note[0], note[1])
    return note

# Given a MUSIC21 note, such as C5 or D#7, convert it
# into a note on the keyboard between 0 and 87 inclusive.
# Don't convert it for mingus; try to use music21 note style
# as much as possible for all this stuff.
def quantify(note):
    notevals = {
        'C' : 0,
        'D' : 2,
        'E' : 4,
        'F' : 5,
        'G' : 7,
        'A' : 9,
        'B' : 11
    }
    quantized = 0
    octave = int(note[-1]) - 1
    for i in note[:-1]:
        if i in notevals: quantized += notevals[i]
        if i == '-': quantized -= 1
        if i == '#': quantized += 1
    quantized += 12 * octave
    return quantized

# Extract notes in chords.
# Shorter single-note chords: lowest prob of being played
def getChords(allchords, mingify=True, minNoteCount=3):
    chords_poss = []
    for chordname in allchords['FullName']:
        notenames = re.findall("[CDEFGAB]+[-]*[sharp|flat]*[in octave]*[1-9]", chordname)
        for ix in range(len(notenames)):
            notenames[ix] = notenames[ix].replace(" in octave ", '').replace("-sharp","#").replace("-flat","-")
        if mingify==True:
            notenames = [mingifytext(note) for note in notenames]
        else:
            notenames = [note for note in notenames]
        toDel = [ix for ix in range(len(notenames)) if "6" in notenames[ix] 
                 or "5" in notenames[ix]] # rm chords with notes too high, e.g. oct == 6 or 5
        notenames = [i for ix, i in enumerate(notenames) if ix not in toDel]
        
        # Prune and add the chord, which is a list of notes
        # 1. Does # of notes > min threshold for # of notes needed for a chord?
        # 2. Skip chord if it has half-notes in it (simplify chord comping).
        minReq = True if len(notenames) >= minNoteCount else False
        noHalfNote = True
        for a, b in chunks(notenames, 2):
            if np.abs(quantify(a) - quantify(b)) % 12 == 1:
                noHalfNote = False
                break
        if minReq and noHalfNote:
            chords_poss.append(sorted(notenames))
    result = sorted(list(chords_poss for chords_poss,_ in itertools.groupby(chords_poss)))
    result = list(result for result,_ in itertools.groupby(result))
    return result

oscarchords = getChords(allchords) # the chordbank
len(oscarchords) # should be same as in (7)

40

In [32]:
allclusters

[['D5',
  'C#5',
  'A5',
  'D5',
  'B-4',
  'F4',
  'D5',
  'F5',
  'A5',
  'D5',
  'F5',
  'E-5',
  'C#5',
  'C5',
  'B-4',
  'G#4'],
 ['A4',
  'F4',
  'C4',
  'D5',
  'A5',
  'F#5',
  'F5',
  'E-5',
  'D5',
  'C5',
  'A4',
  'F4',
  'C4',
  'C#4',
  'D4',
  'E-4',
  'E4',
  'G4',
  'F4'],
 ['D4',
  'C4',
  'G4',
  'E4',
  'F4',
  'D4',
  'C4',
  'G4',
  'E4',
  'F4',
  'G4',
  'G#4',
  'A4',
  'E4',
  'F4'],
 ['D4',
  'E5',
  'F5',
  'C#6',
  'E6',
  'D6',
  'E5',
  'C5',
  'G#5',
  'E5',
  'B-5',
  'F#5',
  'D5',
  'E5',
  'C5',
  'D5',
  'F5',
  'B-4',
  'A4',
  'F5',
  'G5',
  'F5',
  'E5',
  'C5',
  'D5',
  'E5',
  'D5',
  'C5',
  'D5',
  'C5',
  'B-4',
  'A4',
  'F5'],
 ['C5',
  'E-6',
  'D6',
  'B5',
  'G#5',
  'D5',
  'B-5',
  'E4',
  'C4',
  'C5',
  'F5',
  'C5',
  'C#5',
  'D5',
  'F5',
  'E-5',
  'E5',
  'G5'],
 ['A4',
  'B-4',
  'D5',
  'C5',
  'B-4',
  'G4',
  'F4',
  'F5',
  'A5',
  'F5',
  'F6',
  'E-6',
  'C6',
  'F5',
  'G#5',
  'F5',
  'C5',
  'E5',
  'G5',
  'F5',
 

In [31]:
""" The K-Means clustering function for this program. """

# Read in notes and convert into bitwise frame. But might be expensive.
# note sequence is a list in music 21 style, (D/D-)
# note that chordbank and notesequence should be in same format (mingus/m21)
# Returns default dict with notes in whatever music21/mingus style chordbank is already in.

def comp(notesequence, chordbank, limitChords=True, minClustDist=3):
    
    # Load the ML classifier from disk
    with open('part7clf.pkl', 'rb') as fid:
        clf = pickle.load(fid)
        
    # Load the chord default dictionary from disk
    with open('part7cdict.pkl', 'rb') as fid:
        cdict = pickle.load(fid)
        
    # Cluster notes into chunks
    quantizednotes = np.array([quantify(note) for note in notesequence]).reshape(-1, 1)
    km = KMeans(n_clusters=random.randrange(2, 4))
    km.fit(quantizednotes)
    
    # get each cluster with its notes
    firstixs = [0]
    clusters = [] # list of (list of notes)s
    currLabel = km.labels_[0]
    currnotes = []
    for ix, (label, note) in enumerate(zip(km.labels_, notesequence)):
        if note == notesequence[-1]:
            currnotes.append(note)
            clusters.append(currnotes)
            break
        if label == currLabel:
            currnotes.append(note)
        else:
            clusters.append(currnotes)
            firstixs.append(ix)
            currLabel = label
            currnotes = []
            currnotes.append(note)

    # Prune clusters (with firstixs): min dist between clusters
    # For example: if clusters at 2 and 3, remove cluster 3 (front load clusters)
    firstIxsDel = []
    for ix in xrange(len(firstixs)):
        if ix == len(firstixs) - 1:
            break
        diff = firstixs[ix + 1] - firstixs[ix]
        if diff <= minClustDist:
            firstIxsDel.append(ix)
    firstixs = [i for ix, i in enumerate(firstixs) if ix not in firstIxsDel]
    clusters = [i for ix, i in enumerate(clusters) if ix not in firstIxsDel]
    
    # for each cluster, find chord that matches
    allmatches = defaultdict()
    for ix, (cluster, firstix) in enumerate(zip(clusters, firstixs)):
        quantized = map(lambda x: quantify(x), cluster)
        npvect = np.zeros((1, 88))
        for q in quantized:
            npvect[0, q] = 1
        matchchordID = clf.predict(npvect)[0]
        allmatches[firstix] = cdict[matchchordID]
            
    # Prune the default dict
    # If # of things > some threshold, remove random items until threshold
    if limitChords == True:
        threshold = random.choice((0, 2))
        if len(allmatches) > threshold:
            for i in xrange(len(allmatches) - threshold):
                allmatches.pop(random.choice(allmatches.keys()))
            
    
    return allmatches

testnotes = ['D5','F5','C6','B5','A-5','A5','C6','B5','A-5','G5','A-5','A5','C6','B5','F5','E5']
comp(testnotes, oscarchords)

ModuleNotFoundError: No module named 'copy_reg\r'