In [1]:
import os
import librosa
import IPython.display as ipd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage  import generate_binary_structure , binary_erosion , iterate_structure
from scipy.ndimage import maximum_filter
from scipy.signal import find_peaks
from sklearn.manifold import LocallyLinearEmbedding
import librosa

In [139]:
pip install fastdtw

Collecting fastdtw
  Downloading fastdtw-0.3.4.tar.gz (133 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m757.5 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: fastdtw
  Building wheel for fastdtw (setup.py) ... [?25ldone
[?25h  Created wheel for fastdtw: filename=fastdtw-0.3.4-py3-none-any.whl size=3564 sha256=1c61eb7c8988779a425d6ba29b364cf449f9277e61ae3dc137171cd568e67660
  Stored in directory: /home/akshat/.cache/pip/wheels/b2/b2/20/c0960e8ee3ceaf158c43f28eea50357113dfe2f3106da9fdb1
Successfully built fastdtw
Installing collected packages: fastdtw
Successfully installed fastdtw-0.3.4
Note: you may need to restart the kernel to use updated packages.


In [2]:
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw

In [3]:
def load_song(song, sr):
    scale , sr = librosa.load(song,sr=8000)
    pre_emphasized_signal = librosa.effects.preemphasis(scale, coef=0.97)
    hop_length = int(sr * 0.09)  # 10 ms hop
    win_length = int(sr * 0.1)   # 100 ms frame
    spectrogram = librosa.stft(pre_emphasized_signal, n_fft=win_length, hop_length=hop_length)
    spectrogram = np.abs(spectrogram)
#     plot_spectrogram(librosa.amplitude_to_db(spectrogram, ref=np.max), sr, hop_length)
    return spectrogram

In [4]:
def plot_spectrogram(Y, sr, hop_length, y_axis="linear"):
    plt.figure(figsize=(25, 10))
    librosa.display.specshow(Y, 
                             sr=sr, 
                             hop_length=hop_length, 
                             x_axis="time", 
                             y_axis=y_axis)
    plt.colorbar(format="%+2.f")

In [5]:
def mel_spectrogram_func(spectrogram):
    sr = 8000
    hop_length = int(sr * 0.09)  # 10 ms hop
    win_length = int(sr * 0.1)   # 100 ms frame
    mel_spectrogram = librosa.feature.melspectrogram(S=spectrogram, sr=sr, n_mels=18,n_fft=win_length, hop_length=hop_length)
    mel_spectrogram = np.abs(mel_spectrogram)
#     plot_spectrogram(librosa.amplitude_to_db(mel_spectrogram, ref=np.max), sr, hop_length, y_axis="mel")
    return mel_spectrogram

In [6]:
def get_peaks(mel_spectrogram):
    j = 0
    twod_peaks = []
    for frame in mel_spectrogram.T:
        peaks, _ = find_peaks(frame)
        #Get the bigggest peak
        maxPeak = -float('inf')    
        maxPeakIndex = -1
        for peak in peaks :
            if peak >= 2 and frame[peak] > maxPeak and peak < 16:
                maxPeak = frame[peak]
                maxPeakIndex = peak
        if maxPeakIndex != -1 :
            twod_peaks.append((maxPeakIndex,j))
        j += 1

    twod_peaks.sort(key=lambda x: mel_spectrogram[x[0]][x[1]], reverse = True)
#     n_peaks = 100
#     peaks = []
#     for x in range(n_peaks):
#         peaks.append(twod_peaks[x])
    peaks = twod_peaks
    return peaks


In [7]:
def show_peaks(peaks):
    print(peaks)

    #draw a scatter graph of peaks
    plt.figure(figsize=(25, 10))
    plt.scatter([x[1] for x in peaks], [x[0] for x in peaks])
    plt.show()
    print(len(peaks))

In [8]:
def max_amplitude_each_frame(mel_spectrogram):
    # drwaw a graph of maximum ampliuide of each frame
    plt.figure(figsize=(25, 10))
    plt.plot([np.max(frame) for frame in mel_spectrogram.T])
    plt.show()

In [9]:
def generate_eh(peaks):
    eh = []
    for (i,j) in peaks  :
        x = j - 9
        e = []
        for x in  range(j-9,j-2,2) :
            e.append(sum(mel_spectrogram[i][x:x+3]))

        for x in range(j+1,j+8,2):
            e.append(sum(mel_spectrogram[i][x:x+3]))
        eh.append(e)
    eh = np.array(eh)
    return eh

In [10]:
def generate_ev(peaks):
    ev = []
    for (i,j) in peaks  :
        if j + 2 >= mel_spectrogram.shape[1] :
            continue
        e = []
        e.append(sum(mel_spectrogram[i+2][j-1:j+2]))
        e.append(sum(mel_spectrogram[i+1][j-1:j+2]))
        e.append(sum(mel_spectrogram[i-1][j-1:j+2]))
        e.append(sum(mel_spectrogram[i-2][j-1:j+2]))
        ev.append(e)
    ev = np.array(ev)
    return ev

In [11]:
def generate_ec(peaks):
    ec = []
    for (i, j) in peaks:
        e = []
        x = sum(mel_spectrogram[i+1][j-2:j+1])
        x += sum(mel_spectrogram[i][j-2:j+1])
        e.append(x)
        x = sum(mel_spectrogram[i+1][j:j+3])
        x += sum(mel_spectrogram[i][j:j+3])
        e.append(x)
        x = sum(mel_spectrogram[i][j:j+3])
        x += sum(mel_spectrogram[i-1][j:j+3])
        e.append(x)
        x = sum(mel_spectrogram[i][j-2:j+1])
        x += sum(mel_spectrogram[i-1][j-2:j+1])
        e.append(x)
        ec.append(e)
    ec = np.array(ec)
    return ec

In [12]:
def generate_em(peaks):
    em = []
    for (i,j) in peaks :
        e = []
        x = sum(mel_spectrogram[i+2][j-9:j-5])
        x += sum(mel_spectrogram[i+1][j-9:j-5])
        e.append(x)
        x = sum(mel_spectrogram[i+2][j-5:j-1])
        x += sum(mel_spectrogram[i+1][j-5:j-1])
        e.append(x)
        x = sum(mel_spectrogram[i-1][j-9:j-5])
        x += sum(mel_spectrogram[i-2][j-9:j-5])
        e.append(x)
        x = sum(mel_spectrogram[i-1][j-5:j-1])
        x += sum(mel_spectrogram[i-2][j-5:j-1])
        e.append(x)
        x = sum(mel_spectrogram[i+2][j+2:j+6])
        x += sum(mel_spectrogram[i+1][j+2:j+6])
        e.append(x)
        x = sum(mel_spectrogram[i+2][j+6:j+10])
        x += sum(mel_spectrogram[i+1][j+6:j+10])
        e.append(x)
        x = sum(mel_spectrogram[i-1][j+2:j+6])
        x += sum(mel_spectrogram[i-2][j+2:j+6])
        e.append(x)
        x = sum(mel_spectrogram[i-1][j+6:j+10])
        x += sum(mel_spectrogram[i-2][j+6:j+10])
        e.append(x)
        em.append(e)
    em = np.array(em)
    return em


In [13]:
def lle(e, n_components, n_neighbors):
    lle = LocallyLinearEmbedding(n_components=n_components, n_neighbors=n_neighbors)
    e_transformed = lle.fit_transform(e)
    return e_transformed

In [14]:
def generate_fingerprint(eh, ev, ec, em):
    fingerprint = []
    for i in range(1, eh.shape[0]):
        for j in range(1, eh.shape[1]):
            if eh[i][j] - eh[i][j-1] > 0:
                fingerprint.append(1)
            else:
                fingerprint.append(0)
    for i in range(1, ev.shape[0]):
        for j in range(1, ev.shape[1]):
            if ev[i][j] - ev[i][j-1] > 0:
                fingerprint.append(1)
            else:
                fingerprint.append(0)
    for i in range(1, ec.shape[0]):
        for j in range(1, ec.shape[1]):
            if ec[i][j] - ec[i][j-1] > 0:
                fingerprint.append(1)
            else:
                fingerprint.append(0)
    for i in range(1, em.shape[0]):
        for j in range(1, em.shape[1]):
            if em[i][j] - em[i][j-1] > 0:
                fingerprint.append(1)
            else:
                fingerprint.append(0)
    fingerprint = np.array(fingerprint)
    return fingerprint

In [15]:
def dynamic_time_warping(s, t):
    n, m = len(s), len(t)
    D = np.zeros((n+1, m+1))
    D[:, 0] = np.inf
    D[0, :] = np.inf
    D[0, 0] = 0
    for i in range(1, n+1):
        for j in range(1, m+1):
            cost = abs(s[i-1] - t[j-1])
            D[i, j] = cost + min(D[i-1, j], D[i, j-1], D[i-1, j-1])
    return D[-1, -1]

In [18]:
os.chdir('signals/dataset/genres')
# os.chdir('..')

In [19]:
os.getcwd()

'/home/akshat/signals/dataset/genres'

In [20]:
fingerprints = {}

In [21]:
for filename in os.listdir(os.getcwd()):
    f = os.path.join(os.getcwd(), filename)
    if os.path.isdir(f):
        os.chdir(f)
        for file in os.listdir(os.getcwd()):
            file1 = os.path.join(os.getcwd(), file)
            if os.path.isfile(file1):
                print(file1)
                spectrogram = load_song(file1, 8000)
                mel_spectrogram = mel_spectrogram_func(spectrogram)
                peaks = get_peaks(mel_spectrogram)
#                 show_peaks(peaks)
#                 max_amplitude_each_frame(mel_spectrogram)
                eh = generate_eh(peaks)
                ev = generate_ev(peaks)
                ec = generate_ec(peaks)
                em = generate_em(peaks)
#                 eh_t = lle(eh, 6, 3)
#                 ev_t = lle(ev, 3, 3)
#                 ec_t = lle(ec, 3, 3)
#                 em_t = lle(em, 6, 3)
                fingerprint = generate_fingerprint(eh, ev, ec, em)
                fingerprints[file] = fingerprint
        os.chdir('..')

/home/akshat/signals/dataset/genres/jazz/jazz.00056.au
/home/akshat/signals/dataset/genres/jazz/jazz.00024.au
/home/akshat/signals/dataset/genres/jazz/jazz.00046.au
/home/akshat/signals/dataset/genres/jazz/jazz.00048.au
/home/akshat/signals/dataset/genres/jazz/jazz.00043.au
/home/akshat/signals/dataset/genres/jazz/jazz.00028.au
/home/akshat/signals/dataset/genres/jazz/jazz.00073.au
/home/akshat/signals/dataset/genres/jazz/jazz.00055.au
/home/akshat/signals/dataset/genres/jazz/jazz.00054.au
/home/akshat/signals/dataset/genres/jazz/jazz.00064.au
/home/akshat/signals/dataset/genres/jazz/jazz.00036.au
/home/akshat/signals/dataset/genres/jazz/jazz.00021.au
/home/akshat/signals/dataset/genres/jazz/jazz.00001.au
/home/akshat/signals/dataset/genres/jazz/jazz.00006.au
/home/akshat/signals/dataset/genres/jazz/jazz.00057.au
/home/akshat/signals/dataset/genres/jazz/jazz.00084.au
/home/akshat/signals/dataset/genres/jazz/jazz.00069.au
/home/akshat/signals/dataset/genres/jazz/jazz.00016.au
/home/aksh

/home/akshat/signals/dataset/genres/hiphop/hiphop.00079.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00023.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00007.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00035.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00068.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00081.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00098.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00009.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00006.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00042.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00002.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00087.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00053.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00055.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00019.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00080.au
/home/akshat/signals/dataset/genres/hiphop/hiphop.00040.

/home/akshat/signals/dataset/genres/pop/pop.00080.au
/home/akshat/signals/dataset/genres/pop/pop.00071.au
/home/akshat/signals/dataset/genres/pop/pop.00054.au
/home/akshat/signals/dataset/genres/pop/pop.00060.au
/home/akshat/signals/dataset/genres/metal/metal.00039.au
/home/akshat/signals/dataset/genres/metal/metal.00001.au
/home/akshat/signals/dataset/genres/metal/metal.00040.au
/home/akshat/signals/dataset/genres/metal/metal.00006.au
/home/akshat/signals/dataset/genres/metal/metal.00061.au
/home/akshat/signals/dataset/genres/metal/metal.00090.au
/home/akshat/signals/dataset/genres/metal/metal.00019.au
/home/akshat/signals/dataset/genres/metal/metal.00012.au
/home/akshat/signals/dataset/genres/metal/metal.00020.au
/home/akshat/signals/dataset/genres/metal/metal.00028.au
/home/akshat/signals/dataset/genres/metal/metal.00011.au
/home/akshat/signals/dataset/genres/metal/metal.00078.au
/home/akshat/signals/dataset/genres/metal/metal.00077.au
/home/akshat/signals/dataset/genres/metal/metal

/home/akshat/signals/dataset/genres/blues/blues.00058.au
/home/akshat/signals/dataset/genres/blues/blues.00003.au
/home/akshat/signals/dataset/genres/blues/blues.00009.au
/home/akshat/signals/dataset/genres/blues/blues.00022.au
/home/akshat/signals/dataset/genres/blues/blues.00089.au
/home/akshat/signals/dataset/genres/blues/blues.00099.au
/home/akshat/signals/dataset/genres/blues/blues.00047.au
/home/akshat/signals/dataset/genres/blues/blues.00030.au
/home/akshat/signals/dataset/genres/blues/blues.00041.au
/home/akshat/signals/dataset/genres/blues/blues.00092.au
/home/akshat/signals/dataset/genres/blues/blues.00056.au
/home/akshat/signals/dataset/genres/blues/blues.00071.au
/home/akshat/signals/dataset/genres/blues/blues.00062.au
/home/akshat/signals/dataset/genres/blues/blues.00063.au
/home/akshat/signals/dataset/genres/blues/blues.00012.au
/home/akshat/signals/dataset/genres/blues/blues.00094.au
/home/akshat/signals/dataset/genres/blues/blues.00015.au
/home/akshat/signals/dataset/ge

/home/akshat/signals/dataset/genres/classical/classical.00042.au
/home/akshat/signals/dataset/genres/classical/classical.00041.au
/home/akshat/signals/dataset/genres/classical/classical.00075.au
/home/akshat/signals/dataset/genres/classical/classical.00035.au
/home/akshat/signals/dataset/genres/classical/classical.00003.au
/home/akshat/signals/dataset/genres/classical/classical.00095.au
/home/akshat/signals/dataset/genres/classical/classical.00006.au
/home/akshat/signals/dataset/genres/classical/classical.00088.au
/home/akshat/signals/dataset/genres/classical/classical.00097.au
/home/akshat/signals/dataset/genres/classical/classical.00079.au
/home/akshat/signals/dataset/genres/classical/classical.00016.au
/home/akshat/signals/dataset/genres/classical/classical.00043.au
/home/akshat/signals/dataset/genres/classical/classical.00026.au
/home/akshat/signals/dataset/genres/classical/classical.00018.au
/home/akshat/signals/dataset/genres/classical/classical.00077.au
/home/akshat/signals/data

/home/akshat/signals/dataset/genres/rock/rock.00046.au
/home/akshat/signals/dataset/genres/rock/rock.00003.au
/home/akshat/signals/dataset/genres/rock/rock.00070.au
/home/akshat/signals/dataset/genres/rock/rock.00067.au
/home/akshat/signals/dataset/genres/rock/rock.00051.au
/home/akshat/signals/dataset/genres/rock/rock.00060.au
/home/akshat/signals/dataset/genres/rock/rock.00044.au
/home/akshat/signals/dataset/genres/rock/rock.00066.au
/home/akshat/signals/dataset/genres/rock/rock.00029.au
/home/akshat/signals/dataset/genres/rock/rock.00053.au
/home/akshat/signals/dataset/genres/rock/rock.00095.au
/home/akshat/signals/dataset/genres/rock/rock.00009.au
/home/akshat/signals/dataset/genres/rock/rock.00039.au
/home/akshat/signals/dataset/genres/rock/rock.00083.au
/home/akshat/signals/dataset/genres/rock/rock.00042.au
/home/akshat/signals/dataset/genres/rock/rock.00091.au
/home/akshat/signals/dataset/genres/rock/rock.00028.au
/home/akshat/signals/dataset/genres/rock/rock.00032.au
/home/aksh

/home/akshat/signals/dataset/genres/disco/disco.00012.au
/home/akshat/signals/dataset/genres/disco/disco.00015.au
/home/akshat/signals/dataset/genres/disco/disco.00067.au
/home/akshat/signals/dataset/genres/disco/disco.00074.au
/home/akshat/signals/dataset/genres/disco/disco.00043.au
/home/akshat/signals/dataset/genres/disco/disco.00061.au
/home/akshat/signals/dataset/genres/disco/disco.00048.au
/home/akshat/signals/dataset/genres/disco/disco.00049.au
/home/akshat/signals/dataset/genres/disco/disco.00069.au
/home/akshat/signals/dataset/genres/disco/disco.00081.au
/home/akshat/signals/dataset/genres/disco/disco.00087.au
/home/akshat/signals/dataset/genres/disco/disco.00028.au
/home/akshat/signals/dataset/genres/disco/disco.00064.au
/home/akshat/signals/dataset/genres/disco/disco.00078.au
/home/akshat/signals/dataset/genres/disco/disco.00057.au
/home/akshat/signals/dataset/genres/disco/disco.00093.au
/home/akshat/signals/dataset/genres/disco/disco.00044.au
/home/akshat/signals/dataset/ge

/home/akshat/signals/dataset/genres/reggae/reggae.00037.au


In [None]:
spectrogram = load_song('./rock/rock.00000.au', 8000)
mel_spectrogram = mel_spectrogram_func(spectrogram)
peaks = get_peaks(mel_spectrogram)
#                 show_peaks(peaks)
#                 max_amplitude_each_frame(mel_spectrogram)
eh = generate_eh(peaks)
ev = generate_ev(peaks)
ec = generate_ec(peaks)
em = generate_em(peaks)
#                 eh_t = lle(eh, 6, 3)
#                 ev_t = lle(ev, 3, 3)
#                 ec_t = lle(ec, 3, 3)
#                 em_t = lle(em, 6, 3)
fingerprint = generate_fingerprint(eh, ev, ec, em)
min_dist = float('inf')
match = ""

for x in fingerprints.keys():
    f = fingerprints[x]
#     print(x)
    distance, path = fastdtw(f, fingerprint)
#     print(distance)
    if (distance < min_dist):
        min_dist = distance
        match = x
print(match)