# Imports

In [1]:
%matplotlib inline

import matplotlib.pylab as plt

import numpy as np
np.set_printoptions(suppress=True)

import pandas as pd

from IPython.display import display, HTML

from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA

## Distance Measures

In [2]:
def expand_query_space(p,q):
    
    query_space = np.zeros(tuple(p.shape))
    query_space[:,:] = q
    
    return query_space

In [3]:
distance_measures= {}

distance_measures["l1"] = lambda data, i : \
    np.sum(abs(data - data[i,:]), axis=1)

distance_measures["canberra"] = lambda data, i : \
    np.nansum(abs(data - data[i,:]) / (abs(data) + abs(data[i,:])), axis=1)

distance_measures["wave_hedges"] = lambda data, i : \
    np.sum( np.abs(data - data[i,:]) / np.max([data, expand_query_space(data,data[1,:])], axis=0), axis=1)
    
distance_measures["dice"] = lambda data, i : \
    np.sum( (data - data[i,:])**2, axis=1) / \
    (np.sum( data**2, axis=1) + np.sum( expand_query_space(data,data[1,:])**2, axis=1))
    
distance_measures["braycurtis"] = lambda data, i : \
    np.abs(data - data[i,:]).sum(axis=1) / np.abs(data + data[i,:]).sum(axis=1)
    
distance_measures["cosine"] = lambda dat, i : \
    np.abs(1 - np.sum( dat * dat[i,:], axis=1) / \
    (np.sqrt(np.sum( dat**2, axis=1)) * np.sqrt(np.sum(expand_query_space(dat,dat[i,:])**2, axis=1))))

## Load Metadata

#### This file contains links to remote audio files

In [4]:
metadata  = pd.read_csv("//s3store1/fssim1208/Projects/EU EU Sounds/MIR/europeana_allsound.csv", engine="python", sep="\\t")

#### join metadata with rp_features

In [5]:
npz1 = np.load("E:/Data/MIR/EU_SOUNDS_FEATURES/combined_features.npz")

In [6]:
filenames = [f.replace("\\","/").replace("E:/Data/MIR/EU_SOUNDS_FEATURES","").replace(".mp3.npz","") for f in npz1["filenames"]]
filenames = pd.DataFrame({"index":range(len(filenames))}, index=filenames)
metadata  = metadata.join(filenames, how="inner")

#### join with aggregated Europeana Metadata

In [7]:
metadata = metadata.join(pd.read_pickle("E:/Data/MIR/EU_SOUNDS_FEATURES/aggregated_metadata.p"), how="inner")

#### Merge with librosa features

In [8]:
npz3 = np.load("E:/Data/MIR/EU_SOUNDS_FEATURES/combined_librosa_features.npz")

In [9]:
filenames = [f.replace("\\","/").replace("E:/Data/MIR/EU_SOUNDS_FEATURES_LIBROSA","").replace(".mp3.npz","") for f in npz3["filenames"]]
filenames = pd.DataFrame({"index_librosa":range(len(filenames))}, index=filenames)
metadata  = metadata.join(filenames, how="inner")

Export Metadata for MIR-Prototype

In [10]:
metadata.to_pickle("E:/Data/MIR/EU_SOUNDS_FEATURES/combined_metadata.p")

## Load Music Features

### Load features

### Normalize Features

In [11]:
# unit-norm
ssd_my_norm = npz1["tssd"][metadata["index"], :]
#ssd_my_norm /= ssd_my_norm.sum(axis=1, keepdims=True)

# standardization
#ssd_my_norm -= ssd_my_norm.mean(axis=0)
#ssd_my_norm /= ssd_my_norm.std(axis=0)

In [12]:
np.savez("E:/Data/MIR/EU_SOUNDS_FEATURES/my_tssd_pca", data=ssd_my_norm)

In [13]:
# unit-norm
my_rp = npz1["rp"][metadata["index"], :]
#my_rp -= my_rp.mean(axis=0)
#my_rp /= my_rp.std(axis=0)

In [14]:
np.savez("E:/Data/MIR/EU_SOUNDS_FEATURES/my_rp_pca", data=my_rp)

In [15]:
my_mfcc = npz3["mfcc"][metadata["index_librosa"].values,:]
#my_mfcc -= my_mfcc.min(axis=0)
#my_mfcc /= my_mfcc.max(axis=0)

In [16]:
my_chroma = npz3["chroma"][metadata["index_librosa"].values,:]
#my_chroma -= my_chroma.min(axis=0)
#my_chroma /= my_chroma.max(axis=0)

In [17]:
my_rmse = npz3["rmse"][metadata["index_librosa"].values,:]
#my_rmse -= my_rmse.min(axis=0)
#my_rmse /= my_rmse.max(axis=0)

In [18]:
my_spectral_centroid = npz3["spectral_centroid"][metadata["index_librosa"].values,:]
#my_spectral_centroid -= my_spectral_centroid.min(axis=0)
#my_spectral_centroid /= my_spectral_centroid.max(axis=0)

In [41]:
d = pd.DataFrame(my_spectral_centroid)

In [50]:
((d.iloc[0] - d.iloc[5] )**2).sum()

1004852.6046840245

In [40]:
t = ((d.iloc[:50] - d.iloc[5])**2).sum(axis=1)
t.sort()
t

5           0.000000
16       7353.065141
25      13864.261277
46      25482.121530
33      27138.279350
20      39504.535502
23      43977.845394
45      48116.510252
43     374524.949799
49     376364.190917
19     631698.054684
24     686362.785408
12     923773.051848
42     956839.224231
4      967263.773172
0     1004852.604684
2     1007520.507159
34    1094158.553280
44    1104958.109417
10    1250345.675741
40    1285173.656582
3     1301916.117726
1     1319014.464662
30    1413988.320615
38    1441803.997377
35    1510693.244534
47    1511037.128472
37    1589313.378939
13    1703319.994621
27    1709692.350932
15    1750874.198959
8     1841081.968976
18    1918975.022092
36    1981507.769767
26    2327832.938224
11    2343611.016276
39    2421179.476186
17    2523986.834707
21    2833360.599342
9     2842836.560970
6     3047959.100797
32    3309054.073444
7     3326786.125444
22    3395507.141569
41    3763471.974011
31    3811844.685647
28    3833169.119528
48    3982369

In [39]:
t.to_clipboard()

In [19]:
my_tonnetz = npz3["tonnetz"][metadata["index_librosa"].values,:]
#my_tonnetz -= my_tonnetz.min(axis=0)
#my_tonnetz /= my_tonnetz.max(axis=0)

In [20]:
my_zero_crossing_rate = npz3["zero_crossing_rate"][metadata["index_librosa"].values,:]
#my_zero_crossing_rate -= my_zero_crossing_rate.min(axis=0)
#my_zero_crossing_rate /= my_zero_crossing_rate.max(axis=0)

In [21]:
my_bpm = npz3["bpm"][metadata["index_librosa"].values]
my_bpm = my_bpm.reshape((my_bpm.shape[0],1))

In [22]:
def calc_similar_items(idx):

    dists_ssd = distance_measures["canberra"](ssd_my_norm, idx)
    dists_ssd = 1 - (dists_ssd / dists_ssd.max())

    #dists_rh = distance_measures["dice"](my_rh, idx)
    #dists_rh = 1 - (dists_rh / dists_rh.max())
    
    dists_rp = distance_measures["canberra"](my_rp, idx)
    dists_rp = 1 - (dists_rp / dists_rp.max())

    dists_mfcc = distance_measures["canberra"](my_mfcc, idx)
    dists_mfcc = 1 - (dists_mfcc / dists_mfcc.max())

    dists_chroma = distance_measures["canberra"](my_chroma, idx)
    dists_chroma = 1 - (dists_chroma / dists_chroma.max())

    dists_bpm = distance_measures["canberra"](my_bpm, idx)
    dists_bpm = 1 - (dists_bpm / dists_bpm.max())
    
    dists_rmse = distance_measures["canberra"](my_rmse, idx)
    dists_rmse = 1 - (dists_rmse / dists_rmse.max())
    
    dists_spectral_centroid = distance_measures["canberra"](my_spectral_centroid, idx)
    dists_spectral_centroid = 1 - (dists_spectral_centroid / dists_spectral_centroid.max())
    
    dists_tonnetz = distance_measures["canberra"](my_tonnetz, idx)
    dists_tonnetz = 1 - (dists_tonnetz / dists_tonnetz.max())
    
    dists_zero_crossing_rate = distance_measures["canberra"](my_zero_crossing_rate, idx)
    dists_zero_crossing_rate = 1 - (dists_zero_crossing_rate / dists_zero_crossing_rate.max())
    
    dists = (dists_ssd               * 0.9 + \
             dists_rp                * 1.99 + \
             dists_mfcc              * 2.5 + \
             dists_chroma            * 1.3 + \
             dists_bpm               * 0.8 + \
             dists_rmse              * 1.0 + \
             dists_spectral_centroid * 0.9 + \
             dists_tonnetz           * 1.3 + \
             dists_zero_crossing_rate * 0.2)
    
    dists /= dists.max()
    
    #dists = dists_rh
    
    #dists = distance_measures["l1"](my_rp, idx)

    nn = np.argsort(dists)[::-1]

    return dists, nn

Vögel 14000
Latin 24000
Jeweish 2000

146000 - extremes gegenbeispiel

In [34]:
query_idx = 1073

audio_template = "<audio controls><source controls src=\"{0}\" type=\"audio/mp3\"></audio>"

dists, nn = calc_similar_items(query_idx)

html = "<table>"

i = 0
for val in metadata.iloc[nn[:1000],:].values:

    for i in range(len( val )):
        print i, val[i]
    break
    style = ""
    
    if i == 0:
        style = "style=\"background: yellow\""
    
    html = """{0}<tr {7}><td>{1}</td><td>{2}</td><td>{3}</td><td>{8}</td><td>{4}</td><td>{5}</td><td>{6}</td>
                 <td><a href="{9}">Link</a></td></tr>""".format(html, # 0
                                                                     audio_template.format(val[0]),                # 1
                                                                     val[1], # nn idx                               # 2
                                                                     dists[nn[i]], # dist                          # 3
                                                                     val[2].encode("utf-8"), # title               # 4
                                                                     val[4].encode("utf-8"), # lib                 # 5
                                                                     val[9].encode("utf-8"), # col                 # 6
                                                                     style,                                        # 7
                                                                     float(my_bpm[nn[i]]),                         # 8
                                                                     val[8].encode("utf-8"))
                                                                     
    i += 1

    if i > 10:
        break
    
html = "%s</table>" % (html)

#print html
HTML(html)

0 http://www.rachelnet.net/media/medem/musique/libre/extraits/33T00008005ex.mp3
1 1073
2 Paris New-York
3 fr
4 Judaica Europeana
5 09303_Ag_EU_Judaica_mcy33
6 http://www.europeana.eu/api/v2/record/09303/2AE936ED3857BBB95E3D176FF4DCE30C18DA5BFB.json?wskey=HqHRSeD3j
7 None
8 http://www.europeana.eu/portal/record/09303/2AE936ED3857BBB95E3D176FF4DCE30C18DA5BFB.html?utm_source=api&utm_medium=api&utm_campaign=HqHRSeD3j
9 Bibliothèque Medem - Maison de la Culture Yiddish
10 1073


In [171]:
audio_template = "<audio controls><source controls src=\"{0}\" type=\"audio/mp3\"></audio>"

html = "<table>"

i = 0
for val in metadata[metadata["title"].str.contains("irplane")].values[:10]:
#for val in metadata[metadata["organization"].str.contains("OpenUp")].values[1100:1110]:
    
    html = "%s<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" % (html, 
                                                                     audio_template.format(val[0]),
                                                                     val[1], # idx
                                                                     val[2].encode("utf-8"), # title
                                                                     val[4].encode("utf-8"), # lib
                                                                     val[5].encode("utf-8"))                                                           
    
html = "%s</table>" % (html)

#print html
HTML(html)

0,1,2,3,4
,181308,Airplane Blues,DISMARC,2023601_Ag_DE_DISMARC
,140259,Airplane Blues,DISMARC,2023601_Ag_DE_DISMARC
,116338,Fairplanes,DISMARC,2023601_Ag_DE_DISMARC
,96270,Avionul (The Airplane),DISMARC,2023601_Ag_DE_DISMARC
,73925,Airplane Blues,DISMARC,2023601_Ag_DE_DISMARC
,96224,Avionul (The Airplane),DISMARC,2023601_Ag_DE_DISMARC


# Classification

In [13]:
classes = {}

In [122]:
classes["sound"]       = np.array(list(set([63787, 315972, 12030, 12031, 8080, 8079, 8077, 8073, 8068, 8063, 8061, 8163, 8165, 
                                       9065, 9070, 17487, 17494, 17495, 17494, 17493, 17492, 12389])))

In [95]:
classes["mixed_radio"] = np.array(list(set([21765, 22259, 138882, 40181, 29655, 101741, 89576, 21274, 21267, 21241, 21075, 21057,
                                       190967, 21038, 20022])))

In [123]:
classes["spoken"]      = np.array(list(set([20961, 20962, 89576, 21059, 193553, 139738, 126580, 127196, 127196, 2984, 5793, 5821,
                                       19630, 19620, 19621, 19622, 19623, 19624, 19625, 19626, 19627, 19628, 19629, 188527, 
                                       190957, 191762, 191779, 191780, 191785, 191787, 191787, 191788, 191789, 5603, 5604, 
                                       5605, 5606, 5607, 5608, 5609, 5610, 5611, 5612, 8165, 8165, 21032, 21036, 21871, 
                                       21039, 265803, 5706])))

In [124]:
classes["music"]       = np.array(list(set([1000, 287835, 243808, 203125, 1753, 242318, 1275, 246985, 41140, 208598, 205119, 
                                       88919, 62029, 150000, 147844, 148687, 150097, 148542, 250000, 248934, 290361, 32658, 
                                       269533, 302946, 56953, 157645, 157094, 153331, 152033, 191789, 190120, 310918, 
                                       256545, 179073, 150870, 19609, 201280, 4520, 204767, 206179, 234400, 150863, 205444,
                                       206329])))

In [14]:
classes["music"] = []
classes["music"].extend(metadata[metadata["title"].str.contains("Song")].values[:,1].tolist())
classes["music"].extend(metadata[metadata["title"].str.contains("Dance")].values[:,1].tolist())
classes["music"] = np.asarray(classes["music"])

In [15]:
classes["spoken"] = []
classes["spoken"].extend(metadata[metadata["title"].str.contains("nterview")].values[:,1].tolist())
classes["spoken"].extend(metadata[metadata["title"].str.contains("Speech")].values[:,1].tolist())
classes["spoken"] = np.asarray(classes["spoken"])

In [16]:
classes["sound"] = []
classes["sound"].extend(metadata[metadata["organization"].str.contains("OpenUp")].values[:,1].tolist())
classes["sound"] = np.asarray(classes["sound"])

In [17]:
train_idx    = []
train_labels = []

for c in classes.keys():
    print c
    
    train_idx.extend(classes[c].astype(np.int).tolist())
    train_labels.extend([c for i in range(classes[c].shape[0])])
    
train_idx = np.asarray(train_idx)

spoken
sound
music


In [283]:
len(train_idx)

16387

In [284]:
len(train_labels)

16387

In [18]:
from sklearn.preprocessing import LabelEncoder

In [19]:
le = LabelEncoder()
le.fit(train_labels)

LabelEncoder()

In [23]:
feature_space = np.concatenate([ssd_my_norm, my_rp], axis=1)
feature_space -= feature_space.min(axis=0)
feature_space /= feature_space.max(axis=0)

In [21]:
from sklearn.svm import SVC

In [24]:
svm = SVC(kernel="linear", probability=True)

In [25]:
svm.fit(feature_space[train_idx,:], le.transform(train_labels))

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='linear', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [26]:
proba = svm.predict_proba(feature_space)

In [244]:
audio_template = "<audio controls><source controls src=\"{0}\" type=\"audio/mp3\"></audio>"

html = "<table>"

res = metadata[metadata["title"].str.contains("Dance")].values
res = metadata[metadata["organization"].str.contains("OpenUp")].values
print res.shape[0]

i = 0
for val in res[:10]:
#for val in metadata[metadata["organization"].str.contains("OpenUp")].values[1100:1110]:
    
    label = le.inverse_transform(svm.predict(feature_space[val[1],:]))
    
    html = "%s<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" % (html, 
                                                                     audio_template.format(val[0]),
                                                                     val[1], # idx
                                                                     label,
                                                                     val[2].encode("utf-8"), # title
                                                                     val[4].encode("utf-8"), # lib
                                                                     val[5].encode("utf-8"))                                                           
    
html = "%s</table>" % (html)

#print html
HTML(html)

11538


0,1,2,3,4,5
,8061,['sound'],Chorthippus montanus,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8062,['sound'],Chorthippus parallelus parallelus,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8063,['sound'],Chorthippus pullus,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8064,['music'],Chorthippus pullus,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8065,['sound'],Chorthippus rubratibialis,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8066,['sound'],Chorthippus trinacriae,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8067,['sound'],Chorthippus vagans vagans,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8068,['sound'],Chrysochraon beybienkoi,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8069,['sound'],Chrysochraon dispar dispar,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL
,8070,['sound'],Chrysochraon dispar dispar,OpenUp!,11619_Ag_EU_OpenUp!_NBCNL


In [27]:
def calc_similar_items_subsample(subsample, idx):

    dists_ssd = distance_measures["canberra"](ssd_my_norm[subsample,:], idx)
    dists_ssd = 1 - (dists_ssd / dists_ssd.max())
    
    dists_rp = distance_measures["canberra"](my_rp[subsample,:], idx)
    dists_rp = 1 - (dists_rp / dists_rp.max())

    dists = (dists_ssd * 1.0 + dists_rp * 0.51)
    dists /= dists.max()
    
    nn = np.argsort(dists)[::-1]

    return dists, nn

In [31]:
query_idx = 12100

# calculation
nn         = np.argsort(np.abs(proba - proba[query_idx]).sum(axis=1))
sub_sample = nn[:1000]
#h_nn       = np.argsort(np.abs(feature_space[sub_sample, :] - feature_space[query_idx]).sum(axis=1))


dists, h_nn = calc_similar_items_subsample(nn[:1000],0)
nn = sub_sample[h_nn[:10]]

# ---
audio_template = "<audio controls><source controls src=\"{0}\" type=\"audio/mp3\"></audio>"

html = "<table>"

i = 0
for val in metadata.iloc[nn,:].values:
    
    
    label = le.inverse_transform(np.argmax(proba[nn[i],:]))
    
    style = ""
    
    if i == 0:
        style = "style=\"background: yellow\""
    
    html = "{0}<tr {7}><td>{1}</td><td>{2}</td><td>{3}</td><td>{8}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>".format(html, 
                                                                     audio_template.format(val[0]),
                                                                     nn[i], # nn idx
                                                                     dists[h_nn[i]], # dist
                                                                     val[2].encode("utf-8"), # title
                                                                     val[4].encode("utf-8"), # lib
                                                                     val[5].encode("utf-8"), # col
                                                                     style,
                                                                     label)
                                                                     
    i += 1

    if i > 10:
        break
    
html = "%s</table>" % (html)

#print html
HTML(html)

0,1,2,3,4,5,6
,12100,1.0,sound,"Certhia brachydactyla C. L. Brehm, 1820",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,12086,0.404165502753,sound,"Certhia brachydactyla C. L. Brehm, 1820",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,12102,0.401500202289,sound,"Certhia brachydactyla C. L. Brehm, 1820",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,12909,0.400101705885,sound,"Emberiza calandra Linnaeus, 1758",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,14625,0.400078108971,sound,"Gallinula chloropus (Linnaeus, 1758)",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,12101,0.372986410788,sound,"Certhia brachydactyla C. L. Brehm, 1820",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,12071,0.369148531816,sound,"Certhia brachydactyla C. L. Brehm, 1820",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,18067,0.368636386724,sound,"Remiz pendulinus (Linnaeus, 1758)",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,16767,0.364415545911,sound,"Periparus ater (Linnaeus, 1758)",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
,12997,0.363485238131,sound,"Emberiza citrinella Linnaeus, 1758",OpenUp!,11622_Ag_EU_OpenUp!_MFN_MINERALOGY
