In [1]:
import os,sys,time
import numpy as np
from scipy.io import wavfile
import sklearn

from IPython.display import Audio

import musiclib, database

%load_ext cython

# DTW

In [2]:
%%cython
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport sqrt

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef align(float[:,:] sig1,float[:,:] sig2):
    cdef int d = sig1.shape[1]
    cdef int len1 = sig1.shape[0]
    cdef int len2 = sig2.shape[0]
    cdef np.ndarray[np.float32_t, ndim=2] npL = np.empty((len1,len2), dtype=np.float32)
    cdef np.ndarray[np.float32_t, ndim=2] npP = np.empty((len1,len2), dtype=np.float32)
    
    cdef float[:,:] L = npL
    cdef float[:,:] P = npP
    
    cdef float cost,tmp
    cdef int j,k,i
    for j in range(0,len1):
        for k in range(0,len2):
            cost = 0
            for i in range(d):
                tmp = sig1[j,i] - sig2[k,i]
                cost += tmp * tmp
            cost = sqrt(cost)
            
            if j == 0 and k == 0:
                L[j,k] = cost
                P[j,k] = 3
            elif k == 0:
                L[j,k] = cost + L[j-1,k]
                P[j,k] = 2
            elif j == 0:
                L[j,k] = cost + L[j,k-1]
                P[j,k] = 1
            else: # j, k > 0
                if L[j-1,k] < L[j,k-1] and L[j-1,k] < L[j-1,k-1]: # insertion (up)
                    P[j,k] = 1
                    L[j,k] = cost + L[j-1,k]
                elif L[j,k-1] < L[j-1,k-1]: # deletion (left)
                    P[j,k] = 2
                    L[j,k] = cost + L[j,k-1]
                else: # match (up left)
                    P[j,k] = 3
                    L[j,k] = cost + L[j-1,k-1]
    
    return npL,npP

In [3]:
%%cython
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport sqrt

def traceback_loss(float[:,:] sig1,float[:,:] sig2, float[:,:] L):
    sig12 = np.zeros(sig2.shape) # align 1 onto 2
    cdef int j = sig1.shape[0]-1
    cdef int k = sig2.shape[0]-1
    A = []
    C = []
    cdef float cost,tmp
    while True:
        if j == 0 and k == 0:
            A.append((0,0))
            C.append(L[0,0])
            break # got back to the beginning
        
        cost = 0
        for i in range(sig1.shape[1]):
            tmp = sig1[j,i] - sig2[k,i]
            cost += tmp * tmp
        cost = sqrt(cost)
        
        if j>0 and k>0 and L[j,k] == L[j-1,k-1] + cost: # progress
            A.append((j,k))
            C.append(L[j,k])
            j -= 1
            k -= 1
        elif k>0 and L[j,k] == L[j,k-1] + cost: # stay sig2
            A.append((j,k))
            C.append(L[j,k])
            k -= 1
        elif j>0 and L[j,k] == L[j-1,k] + cost: # stay sig1
            A.append((j,k))
            C.append(L[j,k])
            j -= 1
        else: 
            print 'j',j
            print 'k',k
            print 'cost',cost
            print 'L[j,k]',L[j,k]
            print 'L[j,k]-cost',L[j,k]-cost
            print 'L[j,k-1]',L[j,k-1]
            print 'L[j-1,k]',L[j-1,k]
            print 'L[j-1,k-1]',L[j-1,k-1]
            assert False
    
    return list(reversed(A)),list(reversed(C))

In [5]:
record = 'data/cs4-1pre_real.wav'
synth = 'data/cs4-1pre.mid.wav'

fs, data1 = wavfile.read(record)

left_pad = 1*fs
right_pad = 0*fs
window_size=2048
stride=512
cutoff=int(50*(window_size/2048.))
print cutoff

data1 = data1[0:30*fs]
data1 = np.concatenate((np.zeros((left_pad,2)),data1),axis=0)
frep1 = database.featurize(data1,fs,musiclib.feature,window_size,stride=stride,normalize=False)

fs, data2 = wavfile.read(synth)
data2 = data2[0:int(fs*26.2)] # bach cello cutoff corresponding to 30 seconds in real
data2 = np.concatenate((np.zeros((left_pad,2)),data2),axis=0)
frep2 = database.featurize(data2,fs,musiclib.feature,window_size,stride=stride,normalize=False)

50




In [6]:
print data1.shape
print data2.shape

(1367100, 2)
(1199520, 2)


In [7]:
Audio(data1[:,0],rate=fs)

In [None]:
Audio(data2[:,0],rate=fs)

In [14]:
start = time.time()
L,P = align(frep1[0:cutoff].T.astype(np.float32),frep2[0:cutoff].T.astype(np.float32))
end = time.time()
print 'Elapsed time: ' + str(end - start)

Elapsed time: 0.854876041412


In [15]:
path,costs = traceback_loss(frep1[0:cutoff].T.astype(np.float32),frep2[0:cutoff].T.astype(np.float32),L)

In [16]:
path1 = np.array([x[0] for x in path])
path2 = np.array([x[1] for x in path])

In [17]:
onsets,notes = musiclib.load_midi('data/cs4-1pre.mid')
onsets_sig2 = (onsets*fs + left_pad)/stride

In [18]:
onsets_sig1 = []
for onset in onsets_sig2:
    if onset > path2[-1] - right_pad/stride: # if we reached the end of the clip
        break

    sig2_index = np.argmax(path2>=onset)
    onsets_sig1.append(path1[sig2_index])
onsets_sig1 = np.array(onsets_sig1)

In [19]:
out2 = musiclib.mark_notes(data1[:,0],onsets_sig1*stride,notes)
wavfile.write('test.wav',fs,(.3*out2 + .7*data1[:,0]).astype(np.int16))
Audio(.3*out2 + .7*data1[:,0],rate=fs)