# QBE 
### George Tzanetakis, University of Victoria 

In this notebook we explore query-by-example using content-based audio features. The basic idea is that each track is represented as a feature 


In [8]:
import glob
import librosa
import numpy as np

fnames = glob.glob("/Users/georgetzanetakis/data/sound/genres/*/*.wav")

genres = ['classical', 'country', 'disco', 'hiphop', 'jazz', 'rock', 'blues', 'reggae', 'pop', 'metal']

# allocate matrix for audio features and target 
audio_features = np.zeros((len(fnames), 40))
target = np.zeros(len(fnames))
# compute the features 
for (i,fname) in enumerate(fnames): 
    print("Processing %d %s" % (i, fname))
    for (label,genre) in enumerate(genres): 
        if genre in fname: 
            audio, srate = librosa.load(fname)
            mfcc_matrix = librosa.feature.mfcc(y=audio, sr=srate)
            
            mean_mfcc = np.mean(mfcc_matrix,axis=1)
            std_mfcc = np.std(mfcc_matrix, axis=1)
            audio_fvec = np.hstack([mean_mfcc, std_mfcc])
            audio_features[i] = audio_fvec
            target[i] = label
print(audio_features.shape)


from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
audio_features = scaler.fit_transform(audio_features)

Processing 0 /Users/georgetzanetakis/data/sound/genres/pop/pop.00027.wav
Processing 1 /Users/georgetzanetakis/data/sound/genres/pop/pop.00033.wav
Processing 2 /Users/georgetzanetakis/data/sound/genres/pop/pop.00032.wav
Processing 3 /Users/georgetzanetakis/data/sound/genres/pop/pop.00026.wav
Processing 4 /Users/georgetzanetakis/data/sound/genres/pop/pop.00030.wav
Processing 5 /Users/georgetzanetakis/data/sound/genres/pop/pop.00024.wav
Processing 6 /Users/georgetzanetakis/data/sound/genres/pop/pop.00018.wav
Processing 7 /Users/georgetzanetakis/data/sound/genres/pop/pop.00019.wav
Processing 8 /Users/georgetzanetakis/data/sound/genres/pop/pop.00025.wav
Processing 9 /Users/georgetzanetakis/data/sound/genres/pop/pop.00031.wav
Processing 10 /Users/georgetzanetakis/data/sound/genres/pop/pop.00009.wav
Processing 11 /Users/georgetzanetakis/data/sound/genres/pop/pop.00035.wav
Processing 12 /Users/georgetzanetakis/data/sound/genres/pop/pop.00021.wav
Processing 13 /Users/georgetzanetakis/data/sound

Processing 114 /Users/georgetzanetakis/data/sound/genres/metal/metal.00025.wav
Processing 115 /Users/georgetzanetakis/data/sound/genres/metal/metal.00031.wav
Processing 116 /Users/georgetzanetakis/data/sound/genres/metal/metal.00027.wav
Processing 117 /Users/georgetzanetakis/data/sound/genres/metal/metal.00033.wav
Processing 118 /Users/georgetzanetakis/data/sound/genres/metal/metal.00032.wav
Processing 119 /Users/georgetzanetakis/data/sound/genres/metal/metal.00026.wav
Processing 120 /Users/georgetzanetakis/data/sound/genres/metal/metal.00082.wav
Processing 121 /Users/georgetzanetakis/data/sound/genres/metal/metal.00096.wav
Processing 122 /Users/georgetzanetakis/data/sound/genres/metal/metal.00069.wav
Processing 123 /Users/georgetzanetakis/data/sound/genres/metal/metal.00041.wav
Processing 124 /Users/georgetzanetakis/data/sound/genres/metal/metal.00055.wav
Processing 125 /Users/georgetzanetakis/data/sound/genres/metal/metal.00054.wav
Processing 126 /Users/georgetzanetakis/data/sound/ge

Processing 218 /Users/georgetzanetakis/data/sound/genres/disco/disco.00068.wav
Processing 219 /Users/georgetzanetakis/data/sound/genres/disco/disco.00040.wav
Processing 220 /Users/georgetzanetakis/data/sound/genres/disco/disco.00054.wav
Processing 221 /Users/georgetzanetakis/data/sound/genres/disco/disco.00083.wav
Processing 222 /Users/georgetzanetakis/data/sound/genres/disco/disco.00097.wav
Processing 223 /Users/georgetzanetakis/data/sound/genres/disco/disco.00096.wav
Processing 224 /Users/georgetzanetakis/data/sound/genres/disco/disco.00082.wav
Processing 225 /Users/georgetzanetakis/data/sound/genres/disco/disco.00055.wav
Processing 226 /Users/georgetzanetakis/data/sound/genres/disco/disco.00041.wav
Processing 227 /Users/georgetzanetakis/data/sound/genres/disco/disco.00069.wav
Processing 228 /Users/georgetzanetakis/data/sound/genres/disco/disco.00057.wav
Processing 229 /Users/georgetzanetakis/data/sound/genres/disco/disco.00043.wav
Processing 230 /Users/georgetzanetakis/data/sound/ge

Processing 322 /Users/georgetzanetakis/data/sound/genres/blues/blues.00057.wav
Processing 323 /Users/georgetzanetakis/data/sound/genres/blues/blues.00043.wav
Processing 324 /Users/georgetzanetakis/data/sound/genres/blues/blues.00094.wav
Processing 325 /Users/georgetzanetakis/data/sound/genres/blues/blues.00080.wav
Processing 326 /Users/georgetzanetakis/data/sound/genres/blues/blues.00096.wav
Processing 327 /Users/georgetzanetakis/data/sound/genres/blues/blues.00082.wav
Processing 328 /Users/georgetzanetakis/data/sound/genres/blues/blues.00069.wav
Processing 329 /Users/georgetzanetakis/data/sound/genres/blues/blues.00055.wav
Processing 330 /Users/georgetzanetakis/data/sound/genres/blues/blues.00041.wav
Processing 331 /Users/georgetzanetakis/data/sound/genres/blues/blues.00040.wav
Processing 332 /Users/georgetzanetakis/data/sound/genres/blues/blues.00054.wav
Processing 333 /Users/georgetzanetakis/data/sound/genres/blues/blues.00068.wav
Processing 334 /Users/georgetzanetakis/data/sound/ge

Processing 427 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00093.wav
Processing 428 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00085.wav
Processing 429 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00091.wav
Processing 430 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00046.wav
Processing 431 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00052.wav
Processing 432 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00053.wav
Processing 433 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00047.wav
Processing 434 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00090.wav
Processing 435 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00084.wav
Processing 436 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00020.wav
Processing 437 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00034.wav
Processing 438 /Users/georgetzanetakis/data/sound/genres/reggae/reggae.00008.wav
Processing 439 /Users/george

Processing 527 /Users/georgetzanetakis/data/sound/genres/classical/classical.00054.wav
Processing 528 /Users/georgetzanetakis/data/sound/genres/classical/classical.00068.wav
Processing 529 /Users/georgetzanetakis/data/sound/genres/classical/classical.00083.wav
Processing 530 /Users/georgetzanetakis/data/sound/genres/classical/classical.00097.wav
Processing 531 /Users/georgetzanetakis/data/sound/genres/classical/classical.00096.wav
Processing 532 /Users/georgetzanetakis/data/sound/genres/classical/classical.00082.wav
Processing 533 /Users/georgetzanetakis/data/sound/genres/classical/classical.00069.wav
Processing 534 /Users/georgetzanetakis/data/sound/genres/classical/classical.00055.wav
Processing 535 /Users/georgetzanetakis/data/sound/genres/classical/classical.00041.wav
Processing 536 /Users/georgetzanetakis/data/sound/genres/classical/classical.00026.wav
Processing 537 /Users/georgetzanetakis/data/sound/genres/classical/classical.00032.wav
Processing 538 /Users/georgetzanetakis/data

Processing 625 /Users/georgetzanetakis/data/sound/genres/rock/rock.00098.wav
Processing 626 /Users/georgetzanetakis/data/sound/genres/rock/rock.00065.wav
Processing 627 /Users/georgetzanetakis/data/sound/genres/rock/rock.00071.wav
Processing 628 /Users/georgetzanetakis/data/sound/genres/rock/rock.00059.wav
Processing 629 /Users/georgetzanetakis/data/sound/genres/rock/rock.00058.wav
Processing 630 /Users/georgetzanetakis/data/sound/genres/rock/rock.00070.wav
Processing 631 /Users/georgetzanetakis/data/sound/genres/rock/rock.00064.wav
Processing 632 /Users/georgetzanetakis/data/sound/genres/rock/rock.00048.wav
Processing 633 /Users/georgetzanetakis/data/sound/genres/rock/rock.00060.wav
Processing 634 /Users/georgetzanetakis/data/sound/genres/rock/rock.00074.wav
Processing 635 /Users/georgetzanetakis/data/sound/genres/rock/rock.00075.wav
Processing 636 /Users/georgetzanetakis/data/sound/genres/rock/rock.00061.wav
Processing 637 /Users/georgetzanetakis/data/sound/genres/rock/rock.00049.wav

Processing 731 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00042.wav
Processing 732 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00095.wav
Processing 733 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00081.wav
Processing 734 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00080.wav
Processing 735 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00094.wav
Processing 736 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00043.wav
Processing 737 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00057.wav
Processing 738 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00053.wav
Processing 739 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00047.wav
Processing 740 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00090.wav
Processing 741 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00084.wav
Processing 742 /Users/georgetzanetakis/data/sound/genres/hiphop/hiphop.00085.wav
Processing 743 /Users/george

Processing 832 /Users/georgetzanetakis/data/sound/genres/country/country.00002.wav
Processing 833 /Users/georgetzanetakis/data/sound/genres/country/country.00016.wav
Processing 834 /Users/georgetzanetakis/data/sound/genres/country/country.00012.wav
Processing 835 /Users/georgetzanetakis/data/sound/genres/country/country.00006.wav
Processing 836 /Users/georgetzanetakis/data/sound/genres/country/country.00007.wav
Processing 837 /Users/georgetzanetakis/data/sound/genres/country/country.00013.wav
Processing 838 /Users/georgetzanetakis/data/sound/genres/country/country.00005.wav
Processing 839 /Users/georgetzanetakis/data/sound/genres/country/country.00011.wav
Processing 840 /Users/georgetzanetakis/data/sound/genres/country/country.00039.wav
Processing 841 /Users/georgetzanetakis/data/sound/genres/country/country.00038.wav
Processing 842 /Users/georgetzanetakis/data/sound/genres/country/country.00010.wav
Processing 843 /Users/georgetzanetakis/data/sound/genres/country/country.00004.wav
Proc

Processing 935 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00099.wav
Processing 936 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00072.wav
Processing 937 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00066.wav
Processing 938 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00058.wav
Processing 939 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00070.wav
Processing 940 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00064.wav
Processing 941 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00065.wav
Processing 942 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00071.wav
Processing 943 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00059.wav
Processing 944 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00054.wav
Processing 945 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00040.wav
Processing 946 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00068.wav
Processing 947 /Users/georgetzanetakis/data/sound/genres/jazz/jazz.00097.wav

In [9]:
audio_features[0]

array([0.80249486, 0.43592896, 0.68618477, 0.33382174, 0.75549289,
       0.48660099, 0.69772287, 0.33169516, 0.73601816, 0.39269457,
       0.56143273, 0.29821771, 0.63300612, 0.21752766, 0.54423082,
       0.40902828, 0.5005822 , 0.40324   , 0.52373698, 0.4879249 ,
       0.35506141, 0.35337662, 0.26493617, 0.36816589, 0.23934185,
       0.38283945, 0.34962167, 0.34802596, 0.38621303, 0.27794883,
       0.16499908, 0.29228105, 0.27889795, 0.19760867, 0.23831083,
       0.34901699, 0.32478905, 0.3236465 , 0.31043858, 0.35620636])

In [10]:
import pandas as pd 

columns = [] 
for k in range(0,20): 
    columns.append('mean_mfcc_' + str(k))
for k in range(0,20): 
    columns.append('std_mfcc_' + str(k))
genres_pdf = pd.DataFrame(data=audio_features, index=None, columns = columns)
genres_pdf['genre_id'] = target
genres_pdf['genre'] = genres_pdf['genre_id'].apply(lambda x: genres[int(x)])
genres_pdf['fname'] = fnames

genres_pdf.to_csv('GTZAN_summary_mfcc_genres.csv',index=False)
genres_pdf = pd.read_csv('GTZAN_summary_mfcc_genres.csv')
genres_pdf

Unnamed: 0,mean_mfcc_0,mean_mfcc_1,mean_mfcc_2,mean_mfcc_3,mean_mfcc_4,mean_mfcc_5,mean_mfcc_6,mean_mfcc_7,mean_mfcc_8,mean_mfcc_9,...,std_mfcc_13,std_mfcc_14,std_mfcc_15,std_mfcc_16,std_mfcc_17,std_mfcc_18,std_mfcc_19,genre_id,genre,fname
0,0.802495,0.435929,0.686185,0.333822,0.755493,0.486601,0.697723,0.331695,0.736018,0.392695,...,0.197609,0.238311,0.349017,0.324789,0.323646,0.310439,0.356206,8.0,pop,/Users/georgetzanetakis/data/sound/genres/pop/...
1,0.813236,0.386626,0.528124,0.284390,0.552425,0.559080,0.659130,0.336053,0.673094,0.380338,...,0.150320,0.133356,0.174285,0.127812,0.126702,0.147307,0.219845,8.0,pop,/Users/georgetzanetakis/data/sound/genres/pop/...
2,0.738872,0.221309,0.760054,0.299989,0.800903,0.394445,0.656695,0.338196,0.691192,0.314293,...,0.172788,0.252009,0.234410,0.230990,0.328407,0.330090,0.310915,8.0,pop,/Users/georgetzanetakis/data/sound/genres/pop/...
3,0.805841,0.508588,0.728259,0.182446,0.655172,0.452378,0.615258,0.261999,0.642897,0.432627,...,0.262193,0.354340,0.377464,0.318033,0.297733,0.362813,0.309610,8.0,pop,/Users/georgetzanetakis/data/sound/genres/pop/...
4,0.898756,0.379030,0.579857,0.184541,0.576933,0.373573,0.577089,0.332381,0.636011,0.302167,...,0.130000,0.185805,0.233564,0.187841,0.158808,0.143416,0.188740,8.0,pop,/Users/georgetzanetakis/data/sound/genres/pop/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.605977,0.550125,0.435676,0.592317,0.458244,0.746300,0.411844,0.586920,0.347379,0.687228,...,0.152146,0.214442,0.270147,0.194997,0.253174,0.234790,0.263641,4.0,jazz,/Users/georgetzanetakis/data/sound/genres/jazz...
996,0.415167,0.634229,0.631011,0.559890,0.431268,0.629662,0.432104,0.480035,0.427603,0.366135,...,0.250899,0.439356,0.415049,0.397297,0.508789,0.497969,0.352891,4.0,jazz,/Users/georgetzanetakis/data/sound/genres/jazz...
997,0.232404,0.546499,0.559731,0.569044,0.555465,0.704250,0.468020,0.414920,0.602355,0.523296,...,0.344717,0.717369,0.978748,0.803974,0.757105,0.845053,1.000000,4.0,jazz,/Users/georgetzanetakis/data/sound/genres/jazz...
998,0.488184,0.567701,0.569980,0.743177,0.401405,0.637799,0.523867,0.587885,0.235117,0.754499,...,0.136649,0.180711,0.187878,0.270168,0.454491,0.408154,0.312983,4.0,jazz,/Users/georgetzanetakis/data/sound/genres/jazz...


In [11]:

audio_features = genres_pdf[genres_pdf.columns[:-3]].values
target = genres_pdf['genre_id']
print(audio_features.shape)
print(target.shape)

(1000, 40)
(1000,)


In [12]:
from sklearn import svm, metrics
from sklearn.model_selection import cross_val_predict

clf = svm.SVC(gamma='scale', kernel='linear')
clf.fit(audio_features, target)
predicted = cross_val_predict(clf, audio_features, target, cv=10)

print("Confusion matrix:\n%s" % metrics.confusion_matrix(target, predicted))
print("Classification report for classifier %s:\n%s\n"
      % (clf, metrics.accuracy_score(target, predicted)))

Confusion matrix:
[[91  2  0  0  5  2  0  0  0  0]
 [ 1 49 12  0  2 14 12  5  4  1]
 [ 1  5 45 11  0 16  3  3  9  7]
 [ 0  0 10 51  0  3  1 26  4  5]
 [ 3  4  2  0 85  1  4  1  0  0]
 [ 0  5 14  2  8 36 15  6  2 12]
 [ 0  9  2  1  7 11 50  6  0 14]
 [ 0  4  4 18  1  5  5 58  4  1]
 [ 0  3  2  5  0  3  1  4 82  0]
 [ 0  0  4  2  0  5  3  0  0 86]]
Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
0.633



In [15]:
from sklearn.neighbors import NearestNeighbors

nbrs = NearestNeighbors(n_neighbors=5, algorithm='brute').fit(audio_features)
distances, indices = nbrs.kneighbors(audio_features)

# For each instance (feature vector) in the audio_features matrix 
# the distances to the k (5) nearest neighbors are computed and the indices 
# to the corresponding rows are returned 


def nn_names(query_index): 
    return [genres_pdf['fname'][i]  for i in indices[query_index]]

# the first neighbor is the self with distance zero so it is the query 
results = nn_names(100)
for r in results: 
    print(r)

/Users/georgetzanetakis/data/sound/genres/metal/metal.00022.wav
/Users/georgetzanetakis/data/sound/genres/metal/metal.00060.wav
/Users/georgetzanetakis/data/sound/genres/rock/rock.00017.wav
/Users/georgetzanetakis/data/sound/genres/metal/metal.00091.wav
/Users/georgetzanetakis/data/sound/genres/rock/rock.00083.wav


In [16]:
import IPython.display as ipd

print('QUERY: ', results[0])
audio, srate = librosa.load(results[0])
ipd.Audio(audio, rate =srate)

QUERY:  /Users/georgetzanetakis/data/sound/genres/metal/metal.00022.wav


In [17]:
print('RETRIEVED 1: ', results[1])
audio, srate = librosa.load(results[1])
ipd.Audio(audio, rate =srate)

RETRIEVED 1:  /Users/georgetzanetakis/data/sound/genres/metal/metal.00060.wav


In [18]:
print('RETRIEVED 2: ', results[2])
audio, srate = librosa.load(results[2])
ipd.Audio(audio, rate =srate)

RETRIEVED 2:  /Users/georgetzanetakis/data/sound/genres/rock/rock.00017.wav
