## Libraries

In [2]:
import numpy as np
import scipy.io.wavfile
import matplotlib.pyplot as plt

## Part

In [3]:
def part(n,signal,sample_rate):
    fourier = np.fft.fft(signal[(n-1)*2048:n*2048])
    freq = np.fft.fftfreq(2048, 1/sample_rate)
    crop = []
    for i in range(2048):
        if (100 <= freq[i] <= 5000):
            crop.append((freq[i],fourier[i]))
    max_fts=[-10000000]*6
    max_freqs=[-10000000]*6
    gap = 816
    for i in range(len(crop)):
        freq_tmp = crop[i][0]
        fourier_tmp = crop[i][1]
        if  (100 <= freq_tmp <= 100+gap):
            if fourier_tmp>max_fts[0]:
                max_fts[0]=fourier_tmp
                max_freqs[0]=freq_tmp
        elif (100+gap < freq_tmp <= 100+2*gap) :
            if fourier_tmp>max_fts[1]:
                max_fts[1]=fourier_tmp
                max_freqs[1]=freq_tmp
        elif (100+2*gap < freq_tmp <= 100+3*gap) :
            if fourier_tmp>max_fts[2]:
                max_fts[2]=fourier_tmp
                max_freqs[2]=freq_tmp
        elif (100+3*gap < freq_tmp <= 100+4*gap) :
            if fourier_tmp>max_fts[3]:
                max_fts[3]=fourier_tmp
                max_freqs[3]=freq_tmp
        elif (100+4*gap < freq_tmp <= 100+5*gap):
            if fourier_tmp>max_fts[4]:
                max_fts[4]=fourier_tmp
                max_freqs[4]=freq_tmp
        elif (100+5*gap < freq_tmp <= 5000):
            if fourier_tmp>max_fts[5]:
                max_fts[5]=fourier_tmp
                max_freqs[5]=freq_tmp
    return max_freqs

## noiseprint

In [4]:
def noiseprint(filename):
    sample_rate,signal = scipy.io.wavfile.read(filename)
    tmp = part(1,signal,sample_rate)
    tmp.reverse()
    noise_print = np.array([[x] for x in  tmp])
    for i in range(2,len(signal)//2048):
        tmp = part(i,signal,sample_rate)
        tmp.reverse()
        noise_print=np.append(noise_print, [[x] for x in  tmp], axis = 1)
    return noise_print

## Load data

In [5]:
data = []
data.append(("1_prelude",noiseprint("data/1_prelude.wav")))
data.append(("2_love_is_blue",noiseprint("data/2_love_is_blue.wav")))
data.append(("3_chanson_du_toreador",noiseprint("data/3_chanson_du_toreador.wav")))
data.append(("4_el_bimbo",noiseprint("data/4_el_bimbo.wav")))

## similarity

In [6]:
def similarity(song_spec, clip_spec, points_per_slice=6):    
    song_flat = song_spec.flatten()
    clip_flat = clip_spec.flatten()
    
    sim_window_size = points_per_slice - 1
    score = 0
    for anchor in range(clip_flat.shape[0] - points_per_slice):
        anchor_y = anchor % points_per_slice
        sim_window = clip_flat[anchor: anchor+sim_window_size]
        for song_anchor in range(anchor_y, song_flat.shape[0] - points_per_slice - 1, points_per_slice):
            if clip_flat[anchor] == song_flat[song_anchor]:
                if np.count_nonzero((song_flat[song_anchor:song_anchor+sim_window_size] - sim_window) == 0) >= 4:
                    score += 1
    
    score /= song_flat.shape[0]
    return score


In [7]:
def most_similarity(path):
    clip = noiseprint(path)
    maximum = similarity(data[0][1],clip)
    index=0
    print(path[5:-4]+" similarity to "+str(data[0][0])+": "+str(maximum))
    for i in range(1,4):
        tmp = similarity(data[i][1],clip)
        print(path[5:-4]+" similarity to "+str(data[i][0])+": "+str(tmp))
        if tmp>maximum:
            maximum=tmp
            index=i
    print(path[5:-4]+" is most similar to "+str(data[index][0]))
    #################
    # for plot clip #
    #################
    """
    for i in range(clip.shape[1]):
        for j in range(clip.shape[0]):
            plt.scatter(i,clip[j][i])
    """
    return index

In [8]:
clip1 = most_similarity("clip/clip1.wav")

clip1 similarity to 1_prelude: 0.0012919896640826874
clip1 similarity to 2_love_is_blue: 0.0016795865633074936
clip1 similarity to 3_chanson_du_toreador: 0.0003875968992248062
clip1 similarity to 4_el_bimbo: 0.004392764857881137
clip1 is most similar to 4_el_bimbo


In [9]:
clip2 = most_similarity("clip/clip2.wav")

clip2 similarity to 1_prelude: 0.003229974160206718
clip2 similarity to 2_love_is_blue: 0.002325581395348837
clip2 similarity to 3_chanson_du_toreador: 0.002325581395348837
clip2 similarity to 4_el_bimbo: 0.0012919896640826874
clip2 is most similar to 1_prelude


In [10]:
clip3 = most_similarity("clip/clip3.wav")

clip3 similarity to 1_prelude: 0.009819121447028423
clip3 similarity to 2_love_is_blue: 0.002971576227390181
clip3 similarity to 3_chanson_du_toreador: 0.001421188630490956
clip3 similarity to 4_el_bimbo: 0.0007751937984496124
clip3 is most similar to 1_prelude
