In [1]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.cluster import KMeans
import sys, re, itertools, random, copy

In [2]:
# Read in the data and generate the offsets.
# It's okay to generate offsets here since are trigram notes.
# Recall: len = how long note lasts, offset = when it's hit (e.g. stopwatch).
numberofitems = 12345678910112 # some huge placeholder
data = pd.read_csv('./oscar2ngrams.txt', names=['Note','Len'])

# toggle if want to limit to first k items
# 1078 notes in original MIDI file
# numberofitems = 1078
# data = data[:numberofitems]

# Re-add offsets
totaloffset = 0
offsets = []
for i in data['Len']:
    offsets.append(totaloffset)
    totaloffset += i
data["Offset"] = pd.Series(offsets)
data.head()

Unnamed: 0,Note,Len,Offset
0,D5,0.5,0.0
1,C#5,0.25,0.5
2,A5,0.25,0.75
3,D5,0.25,1.0
4,B-4,0.5,1.25


In [3]:
""" Visualization #1: Initial plotting of length over offset. """

# Plot the length over offset.
# *args is some (n, 2) array you want to plot
def plotTiming(data, labels=None, clustercenters=None):
    numberofitems = len(data)
    
    # generate colors
    clusterCodes = dict()
    if labels is not None:
        for i in labels:
            r = lambda: random.randint(0,255)
            clusterCodes[i] = ('#%02X%02X%02X' % (r(),r(),r())).lower()
    
    # Initialize the graph
    dx = data['Offset']
    dy = data['Len']
    dn = data['Note']
    plt.plot(dx, dy, 'm.--', linewidth=1.5)
    for ix, (x, y) in enumerate(zip(dx, dy)):
        color = 'ko'
        if labels is not None:
            color = clusterCodes[labels[ix]]
            plt.plot(x, y, 'x', ms=15, mew=1.5, color=color)
            continue
        plt.plot(x, y, color)

    # plot the cluster centers if available
    if clustercenters is not None:
        for currColorIx, i in enumerate(clustercenters):
            cx = i[0]
            cy = i[1]
            color = clusterCodes[currColorIx]
            plt.plot(cx, cy, 'ko', mew=0, ms=7.5) # plot black. same color: color=color
                
    # plot the ticks if under certain # of points
    if numberofitems <= 100:
        plt.xticks(range(0, int(max(dx)) + 1))

    # Annotate with note data only if under certain # of points
    # (Otherwise, it gets too messy!)
    if numberofitems <= 100 and labels is None:
        for note, offset, length in izip(dn, dx, dy):
            plt.annotate(note, xy=(offset, length), color='g')

    # Enter title
    plt.title('Generated N-Grams', fontsize=20, horizontalalignment='center')
        
    # set fig limits, size, and other display things
    fig = plt.gcf()
    ax = plt.gca()
    plt.ylim([0, max(dy)+ 0.25])
    plt.xlim([min(dx) - 1, max(dx) + 1])
    plt.ylabel('Duration', fontsize=16)
    plt.xlabel('Offset', fontsize=16)
    plt.grid()
    fig = plt.gcf()
    fig.set_size_inches(18, 6)
    # plt.xkcd()
    ax.xaxis.grid(False)

In [5]:
""" Visualization #2: K-Means Clustering. """

notesX = data["Offset"].values.reshape(-1, 1)
notesY = data["Len"].values.reshape(-1, 1)
notesXY = np.concatenate((notesX, notesY), axis=1)
notenames = np.array([i for i in data["Note"]])
km = KMeans(n_clusters=int(np.sqrt(len(notesX) / 2)))
km.fit(notesXY)
kmlabels = km.labels_
# plotTiming(data, labels=kmlabels, clustercenters=km.cluster_centers_)

In [9]:
""" Collect the clusters for use later. """

# Iterate over notes and labels, aggregating clusters indicated by the unique labels.
# Can't use groupby() since you might have multiple labels.
allclusters = [] # this will be a list of (list of notes)s
currlabel = kmlabels[0]
currcluster = []
for ix, note, label in zip(range(len(notenames)), notenames, kmlabels):
    if currlabel == label:
        currcluster.append(note)
    else:
        currlabel = label
        allclusters.append(currcluster)
        currcluster = []
        currcluster.append(note)

In [17]:
""" Read in the chord data. """

# Import the chord data.
allchords = pd.read_csv('oscar2chords.txt', skiprows=2)[:].sort_values("Offset")
allchords.index = range(1, len(allchords) + 1)
with open('oscar2chords.txt', 'r') as f:
    metmark = float(f.readline())
    tsig_num, tsig_den = [i for i in f.readline().replace(' /', '').split()]
    
print("Metronome, Timesig Numerator, Timesig Denominator, # chords played")
print(metmark, tsig_num, tsig_den, len(allchords))
allchords.sort_values(columns="Offset", ascending=True)[:10]
allchords.head()

Metronome, Timesig Numerator, Timesig Denominator, # chords played
176.0 4 4 297


TypeError: sort_values() got an unexpected keyword argument 'columns'