In [None]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import pretty_midi
import midi
import matplotlib.patches
import matplotlib
import djitw
import scipy.spatial
import librosa
import sys
import os
import csv
import glob
import tabulate
import ujson as json
import collections
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
sns.set_style({'grid.linewidth': 1.3})
matplotlib.rc('font',**{'size':13, 'family':'Open Sans'})

In [None]:
# Testing colors
BLUE = '#1a6396'
GREEN = '#59dd97'
ORANGE = '#E8B71A'
GREY = '#DFDFDF'
RED = '#DB3340'
TAN = '#F7EAC8'
FIGSIZE = (9, 6)
FIGSIZE_FLAT = (9, 2)
plt.figure(figsize=(8, 3))
plt.gca().add_patch(plt.Rectangle((-1, -1), 8, 1.5, fc=TAN, lw=0))
plt.gca().add_patch(plt.Rectangle((0, 0), 1, 1, fc=BLUE, lw=0))
plt.gca().add_patch(plt.Rectangle((1, 0), 1, 1, fc=GREEN, lw=0))
plt.gca().add_patch(plt.Rectangle((2, 0), 1, 1, fc=GREY, lw=0))
plt.gca().add_patch(plt.Rectangle((3, 0), 1, 1, fc=RED, lw=0))
plt.gca().add_patch(plt.Rectangle((4, 0), 1, 1, fc=ORANGE, lw=0))
plt.gca().add_patch(plt.Rectangle((5, 0), 1, 1, fc=TAN, lw=0))
plt.gca().add_patch(plt.Rectangle((5, 0), 1, 1, fc=TAN, lw=0))
plt.xlim([-1, 7])
plt.ylim([-1, 2])
#plt.axis('off')

# Chapter 1

In [None]:
plt.figure(figsize=(FIGSIZE[0], FIGSIZE[1]*2))
ax = plt.gca()
t = np.linspace(.1, .9, 9)
plt.vlines(t, 0, 1., linestyles='dashed', alpha=.3, zorder=-1)

vc = .9
words = 'The quick brown fox jumps over the lazy dog'.split(' ')
for x, word in zip(t, words):
    ax.text(x, vc, word, {'family': 'monospace', 'size': 12}, ha='center', va='center')

vc = .7
signal = .08*np.sin(2.4*t*np.pi) + vc
plt.plot(t, signal, 'k.', ms=10)
plt.vlines(t, [s if s > vc else vc for s in signal],
           [s if s < vc else vc for s in signal], lw=1.2) 

vc = .5
a, _ = librosa.load('data/1_A.wav')
N = a.shape[0]
for x in t:
    frame = a[(x - .1)*N:(x + .1)*N]
    spectrum = np.abs(np.fft.rfft(frame))
    spectrum = spectrum[:spectrum.shape[0]/3]
    spectrum = spectrum/3000.
    plt.plot(x + spectrum, np.linspace(vc - .08, vc + .08, spectrum.shape[0]), 'k')

axis = plt.axis()
vc = .3
w = .02
h = .08
dna_names = ['T', 'A', 'C', 'G']
for x in t:
    dna = np.zeros((4, 1))
    n = np.random.randint(0, 4)
    dna[n] = 1
    plt.imshow(dna, interpolation='nearest', extent=(x - w, x + w, vc - h/2, vc + h), cmap=plt.cm.gray)
    plt.plot((x - w, x - w, x + w, x + w, x - w), (vc - h/2, vc + h, vc + h, vc - h/2, vc - h/2), 'k')
    ax.text(x, vc - h/2 - .01, dna_names[n], {'family': 'monospace', 'size': 16}, ha='center', va='top')
plt.axis(axis)

vc = .1
for n, x in enumerate(t):
    im = plt.imread('data/1_video/{}.png'.format(n + 1))
    plt.imshow(im, interpolation='nearest', extent=(x - .03, x + .03, vc - .08, vc + .08))

plt.xlim([0.05, 0.95])
plt.ylim([0, 1])
plt.yticks([])
plt.axis('off')

vc = 0.
words = 'The quick brown fox jumps over the lazy dog'.split(' ')
for n, x in enumerate(t):
    ax.text(x, vc, '$t_{}$'.format(n), {'family': 'monospace', 'size': 16}, ha='center', va='top')

plt.savefig('1-example_sequences.pdf', bbox_inches='tight', pad_inches=0.)

In [None]:
np.random.seed(7)
match_length = 100
crop = match_length/5
def random_walk(N):
    return np.cumsum(np.random.random_integers(-1, 1, N))/np.log(N)
def random_sine(N):
    return np.sin(np.linspace(0, 5*np.pi, N)*np.random.uniform(.9, 1.1) + np.random.uniform(0, 2*np.pi))

match = random_sine(match_length + match_length/10)
query = np.interp(np.arange(match_length),
                  np.arange(match_length + match_length/10),
                  match + .5*random_walk(match_length + match_length/10))
match = match[match_length/10:] + .5*random_walk(match_length)
match = (match - match.mean())/match.std()
query = (query - query.mean())/query.std()

D = np.subtract.outer(match, query[crop/2:-crop/2])**2
p, q, score = djitw.dtw(D, inplace=False)

In [None]:
ds = 3

plt.figure(figsize=FIGSIZE)
plt.plot(match - match.min(), GREEN, lw=2)
plt.plot(query - query.max(), BLUE, lw=2)

for n in range(0, match_length, ds) + [match_length - 1]:
    plt.plot([n, n], [match[n] - match.min(), query[n] - query.max()], 'k:', lw=2)
    
plt.xlim(-1, plt.axis()[1])
plt.axis('off')
plt.savefig('1-example_distance_unwarped.pdf', bbox_inches='tight', pad_inches=0.1)

plt.figure(figsize=FIGSIZE)
plt.plot(match - match.min(), GREEN, lw=2)
plt.plot(np.arange(crop/2, match_length - crop/2),
         query[crop/2:-crop/2] - query[crop/2:-crop/2].max(), BLUE, lw=2)

for p_n, q_n in zip(p[::ds], q[::ds]):
    
    plt.plot([p_n, q_n + crop/2],
             [match[p_n] - match.min(), query[crop/2:-crop/2][q_n] - query[crop/2:-crop/2].max()],
             'k:', lw=2)

plt.plot([p[-1], q[-1] + crop/2],
         [match[p[-1]] - match.min(), query[crop/2:-crop/2][q[-1]] - query[crop/2:-crop/2].max()],
         'k:', lw=2)

plt.xlim(-1, plt.axis()[1])
plt.axis('off')
plt.savefig('1-example_distance_warped.pdf', bbox_inches='tight', pad_inches=0.1)

# Chapter 2

In [None]:
def draw_neural_net(ax, left, right, bottom, top, layer_sizes):
    '''
    Draw a neural network cartoon using matplotilb.
    
    :usage:
        >>> fig = plt.figure(figsize=(12, 12))
        >>> draw_neural_net(fig.gca(), .1, .9, .1, .9, [4, 7, 2])
    
    :parameters:
        - ax : matplotlib.axes.AxesSubplot
            The axes on which to plot the cartoon (get e.g. by plt.gca())
        - left : float
            The center of the leftmost node(s) will be placed here
        - right : float
            The center of the rightmost node(s) will be placed here
        - bottom : float
            The center of the bottommost node(s) will be placed here
        - top : float
            The center of the topmost node(s) will be placed here
        - layer_sizes : list of int
            List of layer sizes, including input and output dimensionality
    '''
    n_layers = len(layer_sizes)
    v_spacing = (top - bottom)/float(max(layer_sizes))
    h_spacing = (right - left)/float(len(layer_sizes) - 1)
    # Nodes
    for n, layer_size in enumerate(layer_sizes):
        layer_top = v_spacing*(layer_size - 1)/2. + (top + bottom)/2.
        for m in xrange(layer_size):
            circle = plt.Circle((n*h_spacing + left, layer_top - m*v_spacing), v_spacing/4.,
                                color='w', ec='k', zorder=4)
            ax.add_artist(circle)
    # Edges
    for n, (layer_size_a, layer_size_b) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        layer_top_a = v_spacing*(layer_size_a - 1)/2. + (top + bottom)/2.
        layer_top_b = v_spacing*(layer_size_b - 1)/2. + (top + bottom)/2.
        for m in xrange(layer_size_a):
            for o in xrange(layer_size_b):
                line = plt.Line2D([n*h_spacing + left, (n + 1)*h_spacing + left],
                                  [layer_top_a - m*v_spacing, layer_top_b - o*v_spacing], c='k')
                ax.add_artist(line)

def label(ax, x, y, text, label_size=30, **kwargs):          
    plt.text(x, y, text, {'size': label_size},
             va='center', ha='center', zorder=10, **kwargs)

def heaviside(ax, left, right, bottom, top):
    middle = left + (right - left)/2.
    for x, y in zip([[left, middle], [middle, middle], [middle, right]], 
                    [[bottom, bottom], [bottom, top], [top, top]]):
        line = plt.Line2D(x, y, c='k', lw=2, zorder=10)
        ax.add_artist(line)

plt.figure(figsize=(FIGSIZE[0], (1 - .16)*FIGSIZE[0]))
ax = plt.gca()
draw_neural_net(ax, .1, .9, .0, 1., [3, 1])

label(ax, .1, .83333, '$x[1]$')
label(ax, .1, .5, '$x[2]$')
label(ax, .1, .16666, '$x[3]$')

label(ax, .5, .71, '$w[1]$', rotation=-24)
label(ax, .5, .535, '$w[2]$')
label(ax, .5, .375, '$w[3]$', rotation=24)

b_coords = (.6, .2)
circle = plt.Circle(b_coords, 1./3./4.,
                    color='w', ec='k', zorder=4)
plt.gca().add_artist(circle)
label(ax, b_coords[0], b_coords[1], '$b$', 36)
line = plt.Line2D([b_coords[0], .9], [b_coords[1], .5], c='k')
ax.add_artist(line)

heaviside(ax, .842, .958, .46, .54)

plt.ylim([.08, .92])
plt.axis('off')
plt.savefig('2-perceptron.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
plt.figure(figsize=(FIGSIZE[0], .71*FIGSIZE[0]))
ax = plt.gca()

circle_radius = .04
circle_spacing = .09

def circle_grid(ax, size, left, bottom, color='w', zorder=4, **kwargs):
    for x in range(size[0]):
        for y in range(size[1]):
            circle = plt.Circle((x*circle_spacing + left, y*circle_spacing + bottom),
                                circle_radius, color=color, ec='k', zorder=zorder - x - y, **kwargs)
            ax.add_artist(circle)

input_left, input_bottom = .05, .05
output_left, output_bottom = .65, input_bottom + 3.85*circle_spacing
circle_grid(ax, (6, 6), input_left, input_bottom)
circle_grid(ax, (4, 4), output_left, output_bottom, color='none', zorder=35, alpha=.3)
circle_grid(ax, (3, 3), input_left + 2*circle_spacing, input_bottom + 2*circle_spacing, GREEN, zorder=30)
circle_grid(ax, (1, 1), output_left + 2*circle_spacing, output_bottom + 2*circle_spacing, GREEN, zorder=40)

ffdeg = circle_radius*np.sqrt(2)/2
for x in range(3):
    for y in range(3):
        line = plt.Line2D([input_left + 2*circle_spacing + x*circle_spacing,
                           output_left + 2*circle_spacing],
                          [input_bottom + 2*circle_spacing + y*circle_spacing,
                           output_bottom + 2*circle_spacing], c='k', zorder=30 - x - y - 1)
        ax.add_artist(line)
plt.axis([0, .97, 0, .71])
plt.axis('off')
plt.savefig('2-convolution.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
plt.figure(figsize=FIGSIZE)
def f(x):
    return x**2
def df(x):
    return 2*x
x = np.linspace(-1, 1, 100)
x_init = .7

for n, lr in enumerate([1.035, .9, .4, .08]):
    plt.subplot(2, 2, n + 1)
    plt.plot(x, f(x), BLUE, lw=2)
    x_current = x_init
    for i in range(5):
        x_new = x_current - lr*df(x_current)
        plt.plot([x_current, x_new], [f(x_current), f(x_new)], 'k:')
        plt.plot([x_current, x_new], [f(x_current), f(x_new)], 'k.', ms=10)
        x_current = x_new
    plt.ylim(-.01, 1.)
    plt.axis('off')
plt.savefig('2-learning_rate.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
import bayes_opt
import matplotlib.gridspec

def target(x):
    return (-3*(x - .7)**2) + .3*(np.sin(10*x**2) + 3)

x = np.linspace(0, 1, 1000)
y = target(x)

bo = bayes_opt.BayesianOptimization(target, {'x': (0, 1)}, False)

gp_params = {'corr': 'squared_exponential'}
bo.initialize(dict((target(n), {'x': n}) for n in [.05, .4, .7, .97]))
bo.maximize(init_points=0, n_iter=0, acq='ei', **gp_params)

def posterior(bo, xmin=-2, xmax=10):
    xmin, xmax = 0, 1
    bo.gp.fit(bo.X, bo.Y)
    mu, sigma2 = bo.gp.predict(np.linspace(xmin, xmax, 1000).reshape(-1, 1), eval_MSE=True)
    return mu, np.sqrt(sigma2)

fig = plt.figure(figsize=FIGSIZE)

gs = matplotlib.gridspec.GridSpec(2, 1, height_ratios=[3, 1]) 
ax = plt.subplot(gs[0])

mu, sigma = posterior(bo)
ax.plot(x, y, BLUE, linewidth=2)
ax.plot(bo.X.flatten(), bo.Y, 'k.', markersize=20)

ax.fill(np.concatenate([x, x[::-1]]), 
          np.concatenate([mu - sigma, (mu + sigma)[::-1]]),
          fc=GREY, ec='None')
ax.axis('off')
ax.set_title('Objective')
ax.set_ylim([y.min(), y.max()*1.1])

ax.set_xlim((0, 1))

ax = plt.subplot(gs[1])
utility = bo.util.utility(x.reshape((-1, 1)), bo.gp, 0)
ax.fill_between(x, utility, facecolor=GREEN, edgecolor='none')
ax.set_xlim((0, 1))
ax.axis('off')
ax.set_title('Expected Improvement')
plt.savefig('2-bayesian_optimization.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
duration = 5
a, fs = librosa.load('data/2_song.mp3', offset=.25, duration=duration)
plt.figure(figsize=FIGSIZE)
plot_fs = 1000
plt.plot(np.linspace(0, duration, duration*plot_fs), librosa.resample(a, fs, plot_fs), BLUE, lw=.2)
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.ylim([-1, 1])
plt.savefig('2-audio_signal.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
import scipy.interpolate

tbl_f = np.asarray(
    [20, 25, 31.5, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400,
     500, 630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300,
     8000, 10000, 12500])
tbl_alpha_f = np.asarray(
    [0.532, 0.506, 0.480, 0.455, 0.432, 0.409, 0.387, 0.367, 0.349, 0.330,
     0.315, 0.301, 0.288, 0.276, 0.267, 0.259, 0.253, 0.250, 0.246, 0.244,
     0.243, 0.243, 0.243, 0.242, 0.242, 0.245, 0.254, 0.271, 0.301])
tbl_L_U = np.asarray(
    [-31.6, -27.2, -23.0, -19.1, -15.9, -13.0, -10.3, -8.1, -6.2, -4.5,
     -3.1, -2.0, -1.1, -0.4, 0.0, 0.3, 0.5, 0.0, -2.7, -4.1, -1.0, 1.7,
     2.5, 1.2, -2.1, -7.1, -11.2, -10.7, -3.1])
tbl_T_f = np.asarray(
    [78.5, 68.7, 59.5, 51.1, 44.0, 37.5, 31.5, 26.5, 22.1, 17.9, 14.4,
     11.4, 8.6, 6.2, 4.4, 3.0, 2.2, 2.4, 3.5, 1.7, -1.3, -4.2, -6.0, -5.4,
     -1.5, 6.0, 12.6, 13.9, 12.3])

def iso226_spl_contour(f, L_N=40):
    A_f = (4.47E-3*(10.0**(0.025*L_N)-1.15) +
           (0.4*10.0**((tbl_T_f+tbl_L_U)/10.0-9.0))**tbl_alpha_f)
    return scipy.interpolate.InterpolatedUnivariateSpline(
        tbl_f, (10.0/tbl_alpha_f)*np.log10(A_f) - tbl_L_U + 94.0, k=3)(f)

f = np.logspace(np.log10(20), np.log10(12500), 100, base=10)
l = iso226_spl_contour(f)
plt.figure(figsize=FIGSIZE)
plt.semilogx(f, l, BLUE, lw=2)
plt.xlim([15, 15000])
plt.xticks([30, 100, 300, 1000, 3000, 10000],
           [30, 100, 300, 1000, 3000, 10000])
plt.xlabel('Frequency (Hz)')
plt.ylabel('Sound Pressure Level (dB)')
plt.savefig('2-equal_loudness.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
piano = pretty_midi.PrettyMIDI()
i = pretty_midi.Instrument(0, False)
i.notes.append(pretty_midi.Note(100, 48, 0., 1.))
piano.instruments.append(i)

plt.figure(figsize=FIGSIZE)
plt.plot(piano.fluidsynth(8000)[400:600], BLUE, lw=2)

guitar = pretty_midi.PrettyMIDI()
i = pretty_midi.Instrument(41, False)
i.notes.append(pretty_midi.Note(100, 48, 0., 1.))
piano.instruments.append(i)
plt.plot(piano.fluidsynth(8000)[400:600], GREEN, lw=2)
sns.despine()
plt.xlabel('Time (ms)')
plt.ylabel('Amplitude')
plt.ylim([-1, 1])
plt.xticks(np.arange(0, 201, 50), np.arange(0, 201, 50)/8.)
plt.savefig('2-timbres.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
plt.figure(figsize=FIGSIZE)
A = np.abs(np.fft.rfft(a))
N = A.shape[0]
#A /= N
A = A[:5001]
plt.xticks(np.arange(0, A.shape[0], A.shape[0]/4),
           100*np.ceil(22050*np.arange(0, A.shape[0], A.shape[0]/4)/N/100.).astype(int))
plt.plot(A, BLUE, lw=.5)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude')
plt.savefig('2-spectrum.pdf', bbox_inches='tight', pad_inches=0.1)

with sns.axes_style('white'):
    plt.figure(figsize=FIGSIZE_FLAT)
    A = np.abs(librosa.stft(a))[:93]
    plt.imshow(A, aspect='auto', origin='lower', cmap=plt.cm.hot, interpolation='none',
               vmin=np.percentile(A, 10), vmax=np.percentile(A, 99.5))
    plt.yticks(np.arange(0, A.shape[0], A.shape[0]/4),
               100*np.ceil(22050*np.arange(0, A.shape[0], A.shape[0]/4)/1024./100.).astype(int))
    plt.xticks(np.arange(0, A.shape[1], A.shape[1]/5), np.arange(6))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.savefig('2-spectrogram.pdf', bbox_inches='tight', pad_inches=0.1)

    plt.figure(figsize=FIGSIZE_FLAT)
    A = librosa.logamplitude(A)
    plt.imshow(A, aspect='auto', origin='lower', cmap=plt.cm.hot, interpolation='none',
               vmin=np.percentile(A, 10), vmax=np.percentile(A, 99.5))
    plt.yticks(np.arange(0, A.shape[0], A.shape[0]/4),
               100*np.ceil(22050*np.arange(0, A.shape[0], A.shape[0]/4)/1024./100.).astype(int))
    plt.xticks(np.arange(0, A.shape[1], A.shape[1]/5), np.arange(6))
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (Hz)')
    plt.savefig('2-log_spectrogram.pdf', bbox_inches='tight', pad_inches=0.1)

    plt.figure(figsize=FIGSIZE_FLAT)
    A = librosa.logamplitude(np.abs(librosa.cqt(a, fmin=librosa.midi_to_hz(36), n_bins=48, real=False)))
    plt.imshow(A, aspect='auto', origin='lower', cmap=plt.cm.hot, interpolation='none',
               vmin=np.percentile(A, 10), vmax=np.percentile(A, 99.9))
    plt.yticks(range(0, 48, 12), [librosa.midi_to_note(n) for n in range(36, 36 + 48, 12)])
    plt.xticks(np.arange(0, A.shape[1], A.shape[1]/5), np.arange(6))
    plt.xlabel('Time (s)')
    plt.ylabel('Note')
    plt.savefig('2-cqt.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
signal1 = np.array([509, 113, -229, 253, -96, -195, 180, -303, -361, 17,
                    -13, 242, 14, -230, 300, 89, -112, -236, -298])
signal2 = np.array([543, 401, 122, -288, 62, 259, 180, -72, -336, 10,
                    223, 263, 35, -345, 68, 400, 38, -109, -301])
q = [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18]
p = [0, 0, 1, 2, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 18]
with sns.axes_style('ticks'):
    plt.figure(figsize=FIGSIZE)
    plt.plot(signal1, lw=2, c=BLUE)
    plt.plot(signal1, '.', ms=10, c=BLUE)
    plt.plot(signal2 + 1000, lw=2, c=GREEN)
    plt.plot(signal2 + 1000, '.', ms=10, c=GREEN)
    for p_n, q_n in zip(p, q):
        plt.plot([p_n, q_n],  [signal1[p_n], signal2[q_n] + 1000], 'k:', lw=2)
    ax = plt.gca()
    ax.get_yaxis().set_visible(False)
    sns.despine(left=True)
    plt.xticks(range(0, 19, 3), range(1, 20, 3))
    plt.xlim([-.1, 18.1])
    plt.savefig('2-example_dtw_sequences.pdf', bbox_inches='tight', pad_inches=0.1)

with sns.axes_style('white'):
    plt.figure(figsize=FIGSIZE)
    dist = scipy.spatial.distance.cdist(signal1.reshape(-1, 1), signal2.reshape(-1, 1))
    plt.imshow(dist, cmap=plt.cm.hot, interpolation='nearest')
    axis = plt.axis()
    for x, y in zip(q, p):
        plt.plot([x - .5, x - .5, x + .5, x + .5, x - .5], [y - .5, y + .5, y + .5, y - .5, y - .5], 'w')
    plt.axis(axis)
    plt.xticks(range(0, 19, 3), range(1, 20, 3))
    plt.yticks(range(0, 19, 3), range(1, 20, 3))
    plt.savefig('2-example_dtw_matrix.pdf', bbox_inches='tight', pad_inches=0.1)

# Chapter 3

In [None]:
ALIGNMENT_SEARCH_PATH = '/Users/craffel/Documents/projects/alignment-search/'
sys.path.append(ALIGNMENT_SEARCH_PATH)
import corrupt_midi
import create_data
import find_best_aligners
import db_utils

In [None]:
# Some utility functions
def compute_cqt(audio_data):
    """ Compute the log-magnitude L2 normalized CQT """
    cqt, times = create_data.extract_cqt(audio_data)
    cqt = librosa.logamplitude(cqt, ref_power=cqt.max())
    return librosa.util.normalize(cqt, 2).T, times
def display_cqt(cqt):
    """ Plot a CQT with sane defaults """
    plt.imshow(cqt.T, aspect='auto', interpolation='nearest',
               origin='lower', cmap=plt.cm.hot,
               vmin=np.percentile(cqt, 1), vmax=np.percentile(cqt, 99))
    plt.yticks(range(0, 48, 12), [librosa.midi_to_note(n) for n in range(36, 36 + 48, 12)])

In [None]:
np.random.seed(2)

# Grab a MIDI file from the clean MIDIs we used in this experiment
midi_file = os.path.join(ALIGNMENT_SEARCH_PATH, 'data/mid/Come as You Are.mid')
# Parse the MIDI file with pretty_midi
midi_object = pretty_midi.PrettyMIDI(midi_file)

with sns.axes_style('white'):
    # For illustration, we'll plot a CQT of the MIDI object
    # before and after corruptions.
    plt.figure(figsize=FIGSIZE_FLAT)
    original_cqt, original_times = compute_cqt(midi_object.fluidsynth(22050))
    display_cqt(original_cqt)
    #plt.title('Original MIDI CQT')
    plt.savefig('3-original_cqt.pdf', bbox_inches='tight', pad_inches=0.1)

# This is the wrapper function to apply all of the corruptions 
# defined in corrupt_midi
adjusted_times,  diagnostics = corrupt_midi.corrupt_midi(
    midi_object, original_times,
    # This defines the extent to which time will be warped
    warp_std=20,
    # These define how likely we are to crop out sections
    # We'll set them to 1 and 0 here for illustration; in the 
    # paper they are adjusted according to the desired corruption level
    start_crop_prob=0., end_crop_prob=0., middle_crop_prob=1.,
    # The likelihood that each instrument is removed
    remove_inst_prob=.5,
    # The likelihood that an instrument's program number is changed
    change_inst_prob=1.,
    # The standard deviation of velocity adjustment
    velocity_std=1.)

with sns.axes_style('white'):
    # Now, we can plot the CQT after corruptions.
    plt.figure(figsize=FIGSIZE_FLAT)
    corrupted_cqt, corrupted_times = compute_cqt(midi_object.fluidsynth(22050))
    display_cqt(corrupted_cqt)
    plt.xlabel('Frame')
    #plt.title('After corruption')
    plt.savefig('3-corrupted_cqt.pdf', bbox_inches='tight', pad_inches=0.1)

# We can also plot the timing offset, which we will try to reverse
plt.figure(figsize=FIGSIZE)
plt.plot(original_times, original_times - adjusted_times, BLUE, lw=2)
plt.xlim([0, original_times.max()])
plt.xlabel('Original time')
plt.ylabel('Offset from original time')
plt.savefig('3-warping.pdf', bbox_inches='tight', pad_inches=0.1)
sns.despine()

In [None]:
# Compute a pairwise distance matrix of the original and corrupted CQTs
distance_matrix = scipy.spatial.distance.cdist(
    original_cqt, corrupted_cqt, 'sqeuclidean')
# Compute the lowest-cost path via DTW with "golden standard" parameters
p, q, score = djitw.dtw(
    distance_matrix, .96, np.median(distance_matrix), inplace=0)

# Compute the absolute error, clipped to within .5 seconds
plt.figure(figsize=FIGSIZE)
error = np.abs(np.clip(
    corrupted_times[q] - adjusted_times[p], -.5, .5))
plt.plot(original_times[p], error, BLUE, lw=2)
plt.xlabel('Time')
plt.ylabel('Correction error')
plt.xlim([0, original_times.max()])
plt.ylim([-0.01, .51])
plt.savefig('3-correction_error.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
# Load in the results from the parameter search experiment
params, objectives = db_utils.get_experiment_results(
    os.path.join(ALIGNMENT_SEARCH_PATH, 'results/parameter_experiment_gp/*.json'))
# Truncate to the top 20 results
good = np.argsort(objectives)[:10]
params = [params[n] for n in good]
objectives = [objectives[n] for n in good]
# Pretty-print using tabulate
for param, objective in zip(params, objectives):
    param['objective'] = objective
header_names = collections.OrderedDict([
    ('add_pen', '$\phi$ Median Scale'),
    ('standardize', 'Standardize?'),
    ('gully', 'Gully $g$'),
    ('objective', 'Mean Error')])
def yes_no(x):
    if isinstance(x, bool):
        if x:
            return 'Yes'
        else:
            return 'No'
    else:
        return x
print tabulate.tabulate([collections.OrderedDict([(k, yes_no(p[k])) for k in header_names]) for p in params],
                        headers=header_names, tablefmt='latex_booktabs')

In [None]:
# Load in all confidence reporting experiment trials
trials = []
for trial_file in glob.glob(os.path.join(ALIGNMENT_SEARCH_PATH, 'results/confidence_experiment/*.json')):
    with open(trial_file) as f:
        trials.append(json.load(f))
# Retrieve the lowest-achieved mean absolute error
best_easy_error = objectives[0]
# Retrieve the confidence reporting trial for this system
best_trial = [t for t in trials
               if np.allclose(np.mean(t['results']['easy_errors']),
                              best_easy_error)][0]
# Retrieve the results from this trial
best_result = best_trial['results']

# Plot a scatter plot of mean alignment error vs. confidence score
errors = np.array(best_result['hard_errors'] + best_result['easy_errors'])
scores = np.array(best_result['hard_penalty_len_norm_mean_norm_scores'] +
                  best_result['easy_penalty_len_norm_mean_norm_scores'])
plt.figure(figsize=FIGSIZE)
plt.scatter(errors, scores, marker='+', c='black', alpha=.3, s=40)
plt.gca().set_xscale('log')
plt.ylim(0., 1.1)
plt.xlim(.9*np.min(errors), np.max(errors)*1.1)
plt.xlabel('Alignment error')
plt.ylabel('Normalized DTW distance')
plt.xticks([.01, .025, .05, .1, .25, .5], [.01, .025, .05, .1, .25, .5])
plt.savefig('3-correlation.pdf', bbox_inches='tight', pad_inches=0.1)

In [None]:
with open(os.path.join(ALIGNMENT_SEARCH_PATH, 'results/alignment_ratings.csv')) as f:
    reader = csv.reader(f)
    # Cast each entry in each row to the correct type
    ratings = [[int(alignment_id), int(rating), np.clip(2*(1 - float(score)), 0, 1), note]
               for alignment_id, rating, score, note in reader]

# We made notes about each alignment, too.
# Here are all the alignments where a transcription was matched to a remix
remixes = [r[1:] for r in ratings if ('remix' in r[-1].lower())]
remixes.sort(key = lambda x: -x[1])
print tabulate.tabulate(remixes, headers=['Rating', 'Confidence Score', 'Note'], tablefmt='latex_booktabs')

In [None]:
# Plot a histogram for each rating
plt.figure(figsize=FIGSIZE)
data = [np.array([r[2] for r in ratings if r[1] == n]) for n in [1, 2, 3, 4, 5]]
violins = plt.violinplot(
    data, showextrema=False, showmeans=False,
    widths=[float(len(d))/max(len(d) for d in data) for d in data])
patches = plt.boxplot(data, showmeans=False, showcaps=False, showfliers=False, 
                      patch_artist=True, widths=.1)
for line in patches['whiskers']:
    line.set_visible(False)
for box in patches['boxes']:
    box.set_facecolor('None')
    box.set_alpha(.5)
    box.set_joinstyle('round')
    box.set_facecolor('w')
    box.set_edgecolor('k')
for line in patches['medians']:
    line.set_color('black')
for body in violins['bodies']:
    body.set_alpha(.8)
for n in [0, 1]:
    violins['bodies'][n].set_facecolor(BLUE)
for n in [2, 3, 4]:
    violins['bodies'][n].set_facecolor(GREEN)
plt.xticks(
    [1, 2, 3, 4, 5],
    [1, 2, 3, 4, 5])
    #['Wrong song', 'Bad alignment', 'Sloppy', 'Embellishments', 'Perfect'],
    #rotation=20)
plt.xlim([.5, 5.5])
plt.xlabel('Rating')
plt.ylabel('Confidence score')
plt.legend(handles=[matplotlib.patches.Patch(color=BLUE, label='Incorrect'),
                    matplotlib.patches.Patch(color=GREEN, label='Correct')],
           loc='upper left')
plt.ylim(-.03, 1.03)
plt.savefig('3-violin.pdf',  bbox_inches='tight', pad_inches=0.1)

## Chapter 4

In [None]:
def pretty_cqt(audio_data, fs=feature_extraction.AUDIO_FS):
    gram = np.abs(librosa.cqt(
        audio_data, sr=fs, hop_length=feature_extraction.AUDIO_HOP,
        fmin=librosa.midi_to_hz(feature_extraction.NOTE_START),
        n_bins=feature_extraction.N_NOTES, real=False))
    # Compute log amplitude
    gram = librosa.logamplitude(gram**2, ref_power=gram.max())
    # Transpose so that rows are samples
    gram = gram.T
    # and L2 normalize
    #gram = librosa.util.normalize(gram, axis=1)
    # and convert to float32
    return gram.astype(np.float32)
audio, fs = librosa.load('data/4-mmt.wav', sr=feature_extraction.AUDIO_FS)
audio_gram = pretty_cqt(audio)
m = pretty_midi.PrettyMIDI('data/4-mmt.mid')
midi_audio_aligned = m.fluidsynth(feature_extraction.AUDIO_FS)
# Adjust to the same size as audio
if midi_audio_aligned.shape[0] > audio.shape[0]:
    midi_audio_aligned = midi_audio_aligned[:audio.shape[0]]
else:
    trim_amount = audio.shape[0] - midi_audio_aligned.shape[0]
    midi_audio_aligned = np.append(midi_audio_aligned,
                                   np.zeros(trim_amount))
midi_aligned_gram = pretty_cqt(midi_audio_aligned)

In [None]:
def draw_brace(ax, left, right, bottom, top, beta=10., fliplr=False, flipud=False):
    if flipud:
        bottom, left = left, bottom
        right, top = top, right
    half_y = (top + bottom)/2.
    half_range = np.linspace(bottom, half_y, 100)
    x = (1/(1. + np.exp(beta*(half_range - bottom)))
         + 1/(1. + np.exp(beta*(half_range - half_y))))
    x = np.concatenate((x, x[::-1]))
    if fliplr:
        x = -x + 1
    else:
        x = x - 1
    x = x*(right - left) + (left + right)/2.
    if flipud:
        ax.plot(np.linspace(bottom, top, 200), x, 'k', lw=2)
    else:
        ax.plot(x, np.linspace(bottom, top, 200), 'k', lw=2)

fig = plt.figure(figsize=(FIGSIZE[0], FIGSIZE[0]))
ax = plt.gca()
draw_neural_net(ax, .3, .6, .51, .79, [4, 7, 5, 3])
ax.text(.32, .76, '$f$', ha='center', va='center', fontdict={'size': 30})
draw_neural_net(ax, .3, .6, .21, .49, [4, 6, 7, 3])
ax.text(.32, .46, '$g$', ha='center', va='center', fontdict={'size': 30})

ax.annotate('', xy=(.15, .8),  xycoords='data',
            xytext=(.25, .65), textcoords=None,
            arrowprops=dict(arrowstyle="<-",
                            connectionstyle="angle,angleA=0,angleB=90,rad=10",
                            lw=2),
            size=40)
draw_brace(ax, .255, .285, .58, .72, 1000.)

ax.annotate('', xy=(.15, .2),  xycoords='data',
            xytext=(.25, .35), textcoords=None,
            arrowprops=dict(arrowstyle="<-",
                            connectionstyle="angle,angleA=0,angleB=90,rad=10",
                            lw=2),
            size=40)
draw_brace(ax, .255, .285, .28, .42, 1000.)

ax.annotate('', xy=(.65, .65),  xycoords='data',
            xytext=(.8, .55), textcoords=None,
            arrowprops=dict(arrowstyle="<-",
                            connectionstyle="angle,angleA=90,angleB=0,rad=10",
                            lw=2),
            size=40)
draw_brace(ax, .615, .645, .6, .7, 1000., 1)

ax.annotate('', xy=(.65, .35),  xycoords='data',
            xytext=(.8, .45), textcoords=None,
            arrowprops=dict(arrowstyle="<-",
                            connectionstyle="angle,angleA=90,angleB=0,rad=10",
                            lw=2),
            size=40)
draw_brace(ax, .615, .645, .3, .4, 1000., 1)

ax.imshow(audio_gram[500:700].T, interpolation='nearest', aspect='auto', cmap=plt.cm.hot,
          origin='lower', extent=(0, 1, .8, 1), vmin=np.percentile(audio_gram, 15))
ax.imshow(midi_aligned_gram[500:700].T, interpolation='nearest', aspect='auto', cmap=plt.cm.hot,
          origin='lower', extent=(0, 1, 0, .2), vmin=np.percentile(midi_aligned_gram, 15))

ax.imshow(np.array([[1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0]]), cmap=plt.cm.hot,
          extent=(.6, 1., .46, .485), interpolation='nearest')

ax.imshow(np.array([[1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0]]), cmap=plt.cm.hot,
          extent=(.6, 1., .51, .535), interpolation='nearest')

ax.add_patch(matplotlib.patches.Rectangle((0.13, 0.001), 0.04, 0.2, alpha=.6, fc='w'))
ax.add_patch(matplotlib.patches.Rectangle((0.13, 0.8), 0.04, 0.2, alpha=.6, fc='w'))

ax.add_patch(matplotlib.patches.Rectangle((0.6, 0.46), 0.4, 0.025, fc='None', ec='k'))
ax.add_patch(matplotlib.patches.Rectangle((0.6, 0.51), 0.4, 0.025, fc='None', ec='k'))

ax.axis([0, 1, 0, 1])
ax.axis('off')
plt.savefig('4-hashing_schematic.pdf',  bbox_inches='tight', pad_inches=0.1)

In [None]:
hash_a = np.array([1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0], dtype=bool)
hash_b = np.array([1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0], dtype=bool)
xor = np.bitwise_xor(hash_a, hash_b)
popcnt = np.sum(xor)

plt.figure(figsize=FIGSIZE)
eps = .002
ax = plt.gca()

hash_l = .24
hash_r = .995
vheight = (hash_l - hash_r)/16

plt.imshow(hash_a[np.newaxis], interpolation='nearest',
           extent=(hash_l, hash_r, 1., 1 - vheight + eps), cmap=plt.cm.hot)
ax.add_patch(plt.Rectangle((hash_l, 1 - vheight), hash_r - hash_l, vheight, fc='none'))

plt.imshow(hash_b[np.newaxis], interpolation='nearest',
           extent=(hash_l, hash_r, 1 - 3*vheight/2 + eps, 1 - 5*vheight/2), cmap=plt.cm.hot)
ax.add_patch(plt.Rectangle((hash_l, 1 - 5*vheight/2), hash_r - hash_l, vheight, fc='none'))

plt.imshow(xor[np.newaxis], interpolation='nearest',
           extent=(hash_l, hash_r, 1 - 3*vheight, 1 - 4*vheight), cmap=plt.cm.hot)
ax.add_patch(plt.Rectangle((hash_l, 1 - 4*vheight), hash_r - hash_l, vheight, fc='none'))

plt.plot([hash_l*.7, hash_r], [1 - 11*vheight/4, 1 - 11*vheight/4], 'k', lw=2)

ax.add_patch(plt.Circle((hash_l*.8, 1 - 2*vheight), vheight/2, fc='none', lw=2))
plt.plot((hash_l*.8, hash_l*.8), (1 - 3*vheight/2, 1 - 5*vheight/2), 'k', lw=2)
plt.plot((hash_l*.8 - vheight/2, hash_l*.8 + vheight/2), (1 - 2*vheight, 1 - 2*vheight), 'k', lw=2)

plt.text(hash_l/2, 1 - 7*vheight/2 + .003, 'POPCNT(', va='center', ha='center',
         fontdict={'size': 28, 'family': 'monospace'})
plt.text(hash_r + (1 - hash_r)/2., 1 - 7*vheight/2 + .003, ')$\;$=$\;${}'.format(popcnt),
         va='center', ha='left', fontdict={'size': 28, 'family': 'monospace'})

plt.axis([0, 1, 1 - 4*vheight + .01, 1])
plt.axis('off')

plt.savefig('4-popcnt.pdf',  bbox_inches='tight', pad_inches=0.1)

In [None]:
# How many false positives/negatives do we get from thresholding the alignment scores at .5?
with open(os.path.join(ALIGNMENT_SEARCH_PATH, 'results/alignment_ratings.csv')) as f:
    reader = csv.reader(f)
    # Cast each entry in each row to the correct type
    ratings = [[int(alignment_id), int(rating), np.clip(2*(1 - float(score)), 0, 1), note]
               for alignment_id, rating, score, note in reader]
print np.sum([r[1] <= 2 and r[2] >= .5 for r in ratings])
print np.sum([r[1] >= 3 and r[2] <= .5 for r in ratings])
print np.sum([r[1] >= 3 and r[2] >= .5 for r in ratings])