This notebook contains code to run evaluations on everything inside `work` directory. The work directory contains all four types of files : 
- performance midi
- performance wav
- score midi
- score wav

In [11]:
import os,sys,time
import numpy as np
from scipy.io import wavfile
import sklearn
from IPython.display import Audio
import musiclib, database
%load_ext cython

The cython extension is already loaded. To reload it, use:
  %reload_ext cython


In [12]:
%%cython
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport sqrt

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef align(float[:,:] sig1,float[:,:] sig2):
    cdef int d = sig1.shape[1]
    cdef int len1 = sig1.shape[0]
    cdef int len2 = sig2.shape[0]
    cdef np.ndarray[np.float32_t, ndim=2] npL = np.empty((len1,len2), dtype=np.float32)
    cdef np.ndarray[np.float32_t, ndim=2] npP = np.empty((len1,len2), dtype=np.float32)
    
    cdef float[:,:] L = npL
    cdef float[:,:] P = npP
    
    cdef float cost,tmp
    cdef int j,k,i
    for j in range(0,len1):
        for k in range(0,len2):
            cost = 0
            for i in range(d):
                tmp = sig1[j,i] - sig2[k,i]
                cost += tmp * tmp
            cost = sqrt(cost)
            
            if j == 0 and k == 0:
                L[j,k] = cost
                P[j,k] = 3
            elif k == 0:
                L[j,k] = cost + L[j-1,k]
                P[j,k] = 2
            elif j == 0:
                L[j,k] = cost + L[j,k-1]
                P[j,k] = 1
            else: # j, k > 0
                if L[j-1,k] < L[j,k-1] and L[j-1,k] < L[j-1,k-1]: # insertion (up)
                    P[j,k] = 1
                    L[j,k] = cost + L[j-1,k]
                elif L[j,k-1] < L[j-1,k-1]: # deletion (left)
                    P[j,k] = 2
                    L[j,k] = cost + L[j,k-1]
                else: # match (up left)
                    P[j,k] = 3
                    L[j,k] = cost + L[j-1,k-1]
    
    return npL,npP

def traceback_loss(float[:,:] sig1,float[:,:] sig2, float[:,:] L):
    sig12 = np.zeros(sig2.shape) # align 1 onto 2
    cdef int j = sig1.shape[0]-1
    cdef int k = sig2.shape[0]-1
    A = []
    C = []
    cdef float cost,tmp
    while True:
        if j == 0 and k == 0:
            A.append((0,0))
            C.append(L[0,0])
            break # got back to the beginning
        
        cost = 0
        for i in range(sig1.shape[1]):
            tmp = sig1[j,i] - sig2[k,i]
            cost += tmp * tmp
        cost = sqrt(cost)
        
        if j>0 and k>0 and L[j,k] == L[j-1,k-1] + cost: # progress
            A.append((j,k))
            C.append(L[j,k])
            j -= 1
            k -= 1
        elif k>0 and L[j,k] == L[j,k-1] + cost: # stay sig2
            A.append((j,k))
            C.append(L[j,k])
            k -= 1
        elif j>0 and L[j,k] == L[j-1,k] + cost: # stay sig1
            A.append((j,k))
            C.append(L[j,k])
            j -= 1
        else: 
#             print 'j',j
#             print 'k',k
#             print 'cost',cost
#             print 'L[j,k]',L[j,k]
#             print 'L[j,k]-cost',L[j,k]-cost
#             print 'L[j,k-1]',L[j,k-1]
#             print 'L[j-1,k]',L[j-1,k]
#             print 'L[j-1,k-1]',L[j-1,k-1]
            assert False
    
    return list(reversed(A)),list(reversed(C))

In [34]:
# Function that aligns two music pieces in the audio domain and then evaluates the alignment, giving
# a score along with charts illustrating the alignment error.
# score_wav : the kern score wav file path
# score_midi : the kern score midi file path
# performance_wav : the maestro wav file path
# performance_midi : the maestro midi file path
# score_length : length(in seconds) that the score will be trimmed to before alignment
# performance_length : length(in seconds) that the performance will be trimmed to before alignment
def alignAndEvaluate(score_wav, score_midi, performance_wav, performance_midi, score_length=None, performance_length=None):
    window_size=2048
    stride=512
    cutoff=int(50*(window_size/2048.)) # only consider first 50 bins of spectrum for analysis
    record = performance_wav
    synth = score_wav
    
    # Read maestro file
    fs, data1 = wavfile.read(record)
    left_pad = 1*fs
    right_pad = 0*fs
    # Trim if necessary
    if performance_length is not None:
        data1 = data1[0:int(performance_length*fs)] 
    data1 = np.concatenate((np.zeros((left_pad,2)),data1),axis=0)
    frep1 = database.featurize(data1,fs,musiclib.feature,window_size,stride=stride,normalize=False)
    
    # Read score file
    fs, data2 = wavfile.read(synth)
    if not (score_length is None):
        data2 = data2[0:int(score_length*fs)]
    data2 = np.concatenate((np.zeros((left_pad,2)),data2),axis=0)
    frep2 = database.featurize(data2,fs,musiclib.feature,window_size,stride=stride,normalize=False)

    # Align signals
    start = time.time()
    L,P = align(frep1[0:cutoff].T.astype(np.float32),frep2[0:cutoff].T.astype(np.float32))
    end = time.time()
    print('Elapsed time: ' + str(end - start))

    # Find optimal path
    path,costs = traceback_loss(frep1[0:cutoff].T.astype(np.float32),frep2[0:cutoff].T.astype(np.float32),L)
    path1 = np.array([x[0] for x in path])
    path2 = np.array([x[1] for x in path])

    if path1[-1] >= path2[-1]:
        longer_piece_length = path1[-1]
    else:
        longer_piece_length = path2[-1]

    # Find corresponding onsets on the performance
    notes_onsets_offsets_2 = musiclib.load_midi(score_midi)
    for i, e in list(enumerate(notes_onsets_offsets_2)):
        notes_onsets_offsets_2[i] = (e[0], (e[1]*fs + left_pad)/stride, (e[2]*fs + left_pad)/stride)

    notes_onsets_offsets_1 = []    
    for note, onset, offset in notes_onsets_offsets_2:
        if (onset > longer_piece_length - right_pad/stride) or (offset > longer_piece_length - right_pad/stride):
            break
        sig2_onset_index = np.argmax(path2>=int(onset))
        sig2_offset_index = np.argmax(path2>=int(offset))
        sig1_onset = path1[sig2_onset_index]
        sig1_offset = path1[sig2_offset_index]
        notes_onsets_offsets_1.append((note, sig1_onset, sig1_offset))
    notes_onsets_offsets_1 = np.array(notes_onsets_offsets_1)

    # Find the correct onsets from the maestro midi file wtc1p19.mid
    notes_onsets_offsets_correct = musiclib.load_midi(performance_midi)

    for i, e in list(enumerate(notes_onsets_offsets_correct)):
        notes_onsets_offsets_correct[i] = (e[0], (e[1]*fs + left_pad)/stride, (e[2]*fs + left_pad)/stride)

    notes_onsets_offsets_correct_temp = []

    for note, onset, offset in notes_onsets_offsets_correct:
        if (onset > longer_piece_length - right_pad/stride) or (offset > longer_piece_length - right_pad/stride):
            break
        notes_onsets_offsets_correct_temp.append((note, onset, offset))
    notes_onsets_offsets_correct = np.array(notes_onsets_offsets_correct_temp)
    # Need to convert to int just like we did while aligning
    notes_onsets_offsets_correct = notes_onsets_offsets_correct.astype(int)

    longer_data = data1 if data1[:,0].shape[0] >= data2[:,0].shape[0] else data2
    x = np.zeros((longer_data[:,0].shape[0], 128), dtype=bool)
    y = np.zeros((longer_data[:,0].shape[0], 128), dtype=bool)

    # Discretize maestro 
    for note, onset, offset in notes_onsets_offsets_correct:
        onset = int(onset*stride)
        offset = int(offset*stride)
        for i in range(onset, offset):
            if (onset > x.shape[0] or offset > x.shape[0]):
                print("onset : " + str(onset) + " offset " + str(offset))
                break
            else:
                x[i][int(note)] = True  

    # Discretize aligned midi         
    for note, onset, offset in notes_onsets_offsets_1:
        onset = int(onset*stride)
        offset = int(offset*stride)
        for i in range(onset, offset):
            if (onset > y.shape[0] or offset > y.shape[0]):
                print("onset : " + str(onset) + " offset " + str(offset))
                break
            y[i][int(note)] = True

    # Error conditions :
    # 1) Something is playing in maestro and nothing is playing in alignment
    # 2) Something is playing in alignment and nothing is playing in maestro
    # This can be efficiently calculated using xor.

    z = np.logical_xor(x[::512], y[::512])
    print("Alignment Error: ", np.sum(z)) # total number of errors
    print("Errors per second: ", np.sum(z)/(44100/512.)) # total number of errors
    print("Errors per frame: ", np.sum(z)/len(z)) # total number of errors
    return {'name': performance_wav, 'alignment_error': np.sum(z), 'error_per_second': np.sum(z)/(44100/512.), 'error_per_frame': np.sum(z)/len(z)}
    
    
alignAndEvaluate('./test/bwv854_prelude_score.wav', './test/bwv854_prelude_score.midi',
                 './test/066_bwv854.wav', './test/066_bwv854.mid', 20, 16)



Elapsed time: 0.13527822494506836
Alignment Error:  1456
Errors per second:  16.904126984126986
Errors per frame:  0.8048645660585959


{'name': './test/066_bwv854.wav',
 'alignment_error': 1456,
 'error_per_second': 16.904126984126986,
 'error_per_frame': 0.8048645660585959}

In [15]:
alignAndEvaluate('./work/bwv857_fugue_score.wav', './work/bwv857_fugue_score.midi',
                 './work/080_bwv857_fugue.wav', './work/080_bwv857_fugue.midi')



Elapsed time: 31.1730899810791
Alignment Error:  23709
Errors per second:  275.2609523809524
Errors per frame:  1.060378371125721


In [16]:
# Get the data-structure which categorizes files in a way
# that we can use to start evaluation
piece_to_files = musiclib.get_piece_to_files_map()

bwv866_prelude {'kern': ['./work/bwv866_prelude_score.wav', './work/bwv866_prelude_score.midi']}
bwv861_prelude {'kern': ['./work/bwv861_prelude_score.wav', './work/bwv861_prelude_score.midi']}
bwv872_fugue {'kern': ['./work/bwv872_fugue_score.wav', './work/bwv872_fugue_score.midi']}
bwv878_prelude {'kern': ['./work/bwv878_prelude_score.wav', './work/bwv878_prelude_score.midi']}
bwv882_fugue {'kern': ['./work/bwv882_fugue_score.midi', './work/bwv882_fugue_score.wav']}
bwv879_prelude {'kern': ['./work/bwv879_prelude_score.wav', './work/bwv879_prelude_score.midi']}
bwv866_fugue {'kern': ['./work/bwv866_fugue_score.wav', './work/bwv866_fugue_score.midi']}
bwv869_prelude {'kern': ['./work/bwv869_prelude_score.wav', './work/bwv869_prelude_score.midi']}
bwv869_fugue {'kern': ['./work/bwv869_fugue_score.midi', './work/bwv869_fugue_score.wav']}
bwv859_prelude {'kern': ['./work/bwv859_prelude_score.midi', './work/bwv859_prelude_score.wav']}
bwv878_fugue {'kern': ['./work/bwv878_fugue_score.wav'

In [36]:
# evaluate all pieces and store results in the following variable
evaluation_results = []
for piece in piece_to_files:
    all_files = piece_to_files[piece]
    kern_files = all_files['kern']
    maestro_list_of_lists = all_files['maestro']
    if len(kern_files) > 0 and len(maestro_files) > 0:
        kern_wav_file = list(filter(lambda x : '.wav' in x,kern_files))[0]
        kern_score_file = list(filter(lambda x : '.mid' in x,kern_files))[0]
        print(kern_wav_file)
        print(kern_score_file)
        for maestro_files in maestro_list_of_lists:
            maestro_wav_file = list(filter(lambda x : '.wav' in x,maestro_files))[0]
            maestro_midi_file = list(filter(lambda x : '.mid' in x,maestro_files))[0]
            result = alignAndEvaluate(kern_wav_file, kern_score_file, maestro_wav_file, maestro_midi_file)
            evaluation_results.append(result)
            print('----------------------------------------------')

./work/bwv863_prelude_score.wav
./work/bwv863_prelude_score.midi




Elapsed time: 1.8123540878295898
Alignment Error:  4934
Errors per second:  57.283628117913835
Errors per frame:  0.5436315557514323
----------------------------------------------
Elapsed time: 1.7922728061676025
Alignment Error:  40785
Errors per second:  473.512925170068
Errors per frame:  4.413005842891149
----------------------------------------------
Elapsed time: 1.9891717433929443
Alignment Error:  9364
Errors per second:  108.71582766439909
Errors per frame:  0.9926852538958973
----------------------------------------------
./work/bwv874_fugue_score.wav
./work/bwv874_fugue_score.midi
Elapsed time: 13.699948072433472
Alignment Error:  16699
Errors per second:  193.87501133786847
Errors per frame:  0.984959301639731
----------------------------------------------
Elapsed time: 2.33571195602417
Alignment Error:  15461
Errors per second:  179.50185941043082
Errors per frame:  1.3468943287742834
----------------------------------------------
Elapsed time: 9.55035924911499
Alignment E

Elapsed time: 22.563745975494385
Alignment Error:  20921
Errors per second:  242.89233560090702
Errors per frame:  0.9356858535712689
----------------------------------------------
Elapsed time: 23.061392784118652
Alignment Error:  94552
Errors per second:  1097.746575963719
Errors per frame:  4.2274881516587675
----------------------------------------------
Elapsed time: 20.382210969924927
Alignment Error:  23709
Errors per second:  275.2609523809524
Errors per frame:  1.060378371125721
----------------------------------------------
Elapsed time: 0.4397590160369873
Alignment Error:  727
Errors per second:  8.440453514739229
Errors per frame:  0.03251487096918467
----------------------------------------------
./work/bwv853_prelude_score.wav
./work/bwv853_prelude_score.midi
Elapsed time: 12.990885972976685
Alignment Error:  103395
Errors per second:  1200.413605442177
Errors per frame:  4.936264680607276
----------------------------------------------
Elapsed time: 12.749129056930542
Ali

Elapsed time: 4.953943729400635
Alignment Error:  8309
Errors per second:  96.46730158730159
Errors per frame:  0.7624334740319325
----------------------------------------------
./work/bwv848_fugue_score.wav
./work/bwv848_fugue_score.midi
Elapsed time: 5.766820192337036
Alignment Error:  13653
Errors per second:  158.51102040816326
Errors per frame:  1.2415204146585432
----------------------------------------------
Elapsed time: 7.376244068145752
Alignment Error:  51509
Errors per second:  598.0183219954648
Errors per frame:  3.683423913043478
----------------------------------------------
Elapsed time: 6.2549169063568115
Alignment Error:  42009
Errors per second:  487.72353741496596
Errors per frame:  3.5573714963163687
----------------------------------------------
Elapsed time: 6.303350925445557
Alignment Error:  13865
Errors per second:  160.97233560090703
Errors per frame:  1.156283879576349
----------------------------------------------
Elapsed time: 5.863526821136475
Alignment E

Elapsed time: 3.1537232398986816
Alignment Error:  9555
Errors per second:  110.93333333333334
Errors per frame:  1.1029666397321944
----------------------------------------------
Elapsed time: 3.5075459480285645
Alignment Error:  9108
Errors per second:  105.74367346938776
Errors per frame:  1.0513678864134826
----------------------------------------------
./work/bwv875_prelude_score.wav
./work/bwv875_prelude_score.midi
Elapsed time: 2.9636857509613037
Alignment Error:  9241
Errors per second:  107.28780045351473
Errors per frame:  1.1412868963813758
----------------------------------------------
Elapsed time: 3.557593822479248
Alignment Error:  23041
Errors per second:  267.50548752834464
Errors per frame:  2.7145381715362866
----------------------------------------------
Elapsed time: 2.964660167694092
Alignment Error:  7744
Errors per second:  89.90766439909297
Errors per frame:  0.9564036062739286
----------------------------------------------
Elapsed time: 3.3345820903778076
Alig

Alignment Error:  29342
Errors per second:  340.6599546485261
Errors per frame:  3.5343290773307636
----------------------------------------------
Elapsed time: 2.837383985519409
Alignment Error:  27429
Errors per second:  318.4500680272109
Errors per frame:  3.4251998001998003
----------------------------------------------
Elapsed time: 2.979818105697632
Alignment Error:  28299
Errors per second:  328.5507482993197
Errors per frame:  3.344640113461766
----------------------------------------------
Elapsed time: 2.943068742752075
Alignment Error:  28118
Errors per second:  326.4493424036281
Errors per frame:  3.3775375375375374
----------------------------------------------
Elapsed time: 3.766573905944824
Alignment Error:  34383
Errors per second:  399.185850340136
Errors per frame:  3.343674025089954
----------------------------------------------
Elapsed time: 2.8749399185180664
Alignment Error:  25833
Errors per second:  299.92054421768705
Errors per frame:  3.172418027753899
-------

In [38]:
# print a few results
print(evaluation_results[0:5])

[{'name': './work/105_bwv863_prelude.wav', 'alignment_error': 4934, 'error_per_second': 57.283628117913835, 'error_per_frame': 0.5436315557514323}, {'name': './work/106_bwv863_prelude.wav', 'alignment_error': 40785, 'error_per_second': 473.512925170068, 'error_per_frame': 4.413005842891149}, {'name': './work/104_bwv863_prelude.wav', 'alignment_error': 9364, 'error_per_second': 108.71582766439909, 'error_per_frame': 0.9926852538958973}, {'name': './work/049_bwv874_fugue.wav', 'alignment_error': 16699, 'error_per_second': 193.87501133786847, 'error_per_frame': 0.984959301639731}, {'name': './work/050_bwv874_fugue.wav', 'alignment_error': 15461, 'error_per_second': 179.50185941043082, 'error_per_frame': 1.3468943287742834}]
