In [100]:
import librosa
import numpy as np
import pandas as pd
from numpy.random import randn
import os, os.path
import IPython.display as ipd

# Music Fingerprinting using Locality Sensitive Hashing

In [25]:
dir = '../data_FMP/'
wav_files = []
for file in os.listdir(dir):
    if file.endswith(".wav"):
        wav_files.append(os.path.join(dir, file))

In [26]:
wav_files

['../data_FMP/FMP_C3S3_Beethoven_Fifth-MM1-21_Bernstein.wav',
 '../data_FMP/FMP_C3S3_Beethoven_Fifth-MM1-21_Karajan1946.wav',
 '../data_FMP/FMP_C3S3_Beethoven_Fifth-MM1-21_Midi-Piano.wav',
 '../data_FMP/FMP_C3S3_Beethoven_Fifth-MM1-21_Scherbakov.wav',
 '../data_FMP/FMP_C3_F03.wav',
 '../data_FMP/FMP_C3_F05.wav',
 '../data_FMP/FMP_C3_F05_BurgmuellerFirstPart.wav',
 '../data_FMP/FMP_C3_F08_C-major-scale.wav',
 '../data_FMP/FMP_C3_F08_C-major-scale_40-cents-up.wav',
 '../data_FMP/FMP_C3_F08_C-major-scale_400-cents-up.wav',
 '../data_FMP/FMP_C3_F08_C-major-scale_pause.wav',
 '../data_FMP/FMP_C3_NoteC4_Violin.wav',
 '../data_FMP/FMP_C4_Audio_Brahms_HungarianDances-05_Ormandy.wav',
 '../data_FMP/FMP_C4_Audio_Chopin_Op028-11_003_20100611-SMD.wav',
 '../data_FMP/FMP_C4_F13_ZagerEvans_InTheYear2525.wav',
 '../data_FMP/FMP_C5_F01_Beatles_LetItBe-mm1-4_Original.wav',
 '../data_FMP/FMP_C5_F20_Bach_BWV846-mm1-4_Fischer.wav',
 '../data_FMP/FMP_C6_Audio_Borodin-sec39_RWC.wav',
 '../data_FMP/FMP_C6_Au

In [27]:
def hash_func(vecs, projections):
    bools = np.dot(vecs, projections.T) > 0
    return [bool2int(bool_vec) for bool_vec in bools]

def bool2int(x):
    y = 0
    for i,j in enumerate(x):
        if j: y += 1<<i
    return y

bool2int([False, True, False, True])

10

In [35]:
X = randn(10,100)
P = randn(3,100)
hash_func(X, P)

[0, 1, 3, 4, 7, 4, 6, 2, 7, 6]

In [45]:
class Table:
    
    def __init__(self, hash_size, dim):
        self.table = dict()
        self.hash_size = hash_size
        self.projections = randn(self.hash_size, dim)

    def add(self, vecs, label):
        entry = {'label': label}
        hashes = hash_func(vecs, self.projections)
        for h in hashes:
            if h in self.table:
                self.table[h].append(entry)
            else:
                self.table[h] = [entry]

    def query(self, vecs):
        hashes = hash_func(vecs, self.projections)
        results = list()
        for h in hashes:
            if h in self.table:
                results.extend(self.table[h])
        return results

In [46]:
class LSH:
    
    def __init__(self, dim):
        self.num_tables = 4
        self.hash_size = 8
        self.tables = list()
        for i in range(self.num_tables):
            self.tables.append(Table(self.hash_size, dim))
    
    def add(self, vecs, label):
        for table in self.tables:
            table.add(vecs, label)
    
    def query(self, vecs):
        results = list()
        for table in self.tables:
            results.extend(table.query(vecs))
        return results

    def describe(self):
        for table in self.tables:
            print(table.table)

In [86]:
class MusicSearch:
    
    def __init__(self, training_files):
        self.frame_size = 4096
        self.hop_size = 4000
        self.fv_size = 12
        self.lsh = LSH(self.fv_size)
        self.training_files = training_files
        self.num_features_in_file = dict()
        for f in self.training_files:
            self.num_features_in_file[f] = 0
                
    def train(self):
        for filepath in self.training_files:
            x, fs = librosa.load(filepath)
            features = librosa.feature.chroma_stft(y=x, sr=fs, n_fft=self.frame_size, hop_length=self.hop_size).T
            self.lsh.add(features, filepath)
            self.num_features_in_file[filepath] += len(features)
                
    def query(self, filepath):
        x, fs = librosa.load(filepath)
        features = librosa.feature.chroma_stft(y=x, sr=fs, n_fft=self.frame_size, hop_length=self.hop_size).T
        results = self.lsh.query(features)
        print('num results', len(results))

        counts = dict()
        for r in results:
            if r['label'] in counts:
                counts[r['label']] += 1
            else:
                counts[r['label']] = 1
        for k in counts:
            counts[k] = float(counts[k])/self.num_features_in_file[k]
        return counts

## train

In [87]:
ms = MusicSearch(wav_files)
ms.train()

## test

In [97]:
wav_files[0]

'../data_FMP/FMP_C3S3_Beethoven_Fifth-MM1-21_Bernstein.wav'

In [101]:
ipd.Audio(wav_files[0])

In [98]:
test_file = wav_files[0]
results = ms.query(test_file)

num results 571182


In [99]:
result_df = pd.DataFrame(results, index=['result']).T
result_df.index = [a.split('/')[-1] for a in list(results.keys())]
result_df.sort_values('result', ascending=False)

Unnamed: 0,result
FMP_C3_NoteC4_Violin.wav,57.666667
FMP_C6_F04_NoteC4_PTVF.wav,49.817204
FMP_C3S3_Beethoven_Fifth-MM1-21_Bernstein.wav,45.557377
FMP_C7_Audio_Beethoven_Op067-01-001-021_Bernstein.wav,45.557377
FMP_C6_F04_NoteC4_Piano.wav,45.000000
...,...
FMP_C8_F02_Long_Castanets.wav,12.594595
FMP_C4_Audio_Chopin_Op028-11_003_20100611-SMD.wav,10.538095
FMP_C8_F27_Chopin_Op028-04_major.wav,10.222222
FMP_C6_Audio_Borodin-sec39_RWC.wav,10.218605
