# Evaluation of the MIR Pilot

In [1]:
%matplotlib inline

import matplotlib.pylab as plt

import numpy as np
np.set_printoptions(suppress=True)

import pandas as pd

from IPython.display import display, HTML

from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA

**Data Paths**

In [22]:
FEATURE_FILE_PATH = "E:/Data/MIR/EU_SOUNDS_FEATURES/combined_features.npz"
METADATA_PATH     = "E:/Data/MIR/EU_SOUNDS_FEATURES/combined_metadata.p"

**Utility Functions**

In [3]:
def expand_query_space(p,q):
    
    query_space = np.zeros(tuple(p.shape))
    query_space[:,:] = q
    
    return query_space

**Distance Measures**

In [4]:
distance_measures= {}

distance_measures["l1"] = lambda data, i : \
    np.sum(abs(data - data[i,:]), axis=1)

distance_measures["canberra"] = lambda data, query : \
    np.nansum(abs(data - query) / (abs(data) + abs(query)), axis=1)

distance_measures["wave_hedges"] = lambda data, i : \
    np.sum( np.abs(data - data[i,:]) / np.max([data, expand_query_space(data,data[1,:])], axis=0), axis=1)
    
distance_measures["dice"] = lambda data, i : \
    np.sum( (data - data[i,:])**2, axis=1) / \
    (np.sum( data**2, axis=1) + np.sum( expand_query_space(data,data[1,:])**2, axis=1))
    
distance_measures["braycurtis"] = lambda data, i : \
    np.abs(data - data[i,:]).sum(axis=1) / np.abs(data + data[i,:]).sum(axis=1)
    
distance_measures["cosine"] = lambda dat, i : \
    np.abs(1 - np.sum( dat * dat[i,:], axis=1) / \
    (np.sqrt(np.sum( dat**2, axis=1)) * np.sqrt(np.sum(expand_query_space(dat,dat[i,:])**2, axis=1))))

## Load Data

### Load Metadata

In [34]:
metadata  = pd.read_pickle(METADATA_PATH)
metadata2 = pd.read_pickle("E:/Data/MIR/EU_SOUNDS_FEATURES/aggregated_metadata.p")
#m         = metadata.join(metadata2, how='right', rsuffix='a', lsuffix='b')
m = metadata

### Load Feature Data

In [24]:
features = {}
 
# open pickle
npz = np.load("E:/Data/MIR/EU_SOUNDS_FEATURES/combined_features.npz")

# load features
features["ssd"] = npz["ssd"][metadata["index"], :]
features["rp"]  = npz["rp"][metadata["index"], :]

npz.close()


npz = np.load("E:/Data/MIR/EU_SOUNDS_FEATURES/combined_librosa_features.npz")

features["mfcc"] = npz["mfcc"][metadata["index_librosa"].values,:]
#features["mfcc"] -= features["mfcc"].min(axis=0)
#features["mfcc"] /= features["mfcc"].max(axis=0)

features["chroma"] = npz["chroma"][metadata["index_librosa"].values,:]
#features["chroma"] -= features["chroma"].min(axis=0)
#features["chroma"] /= features["chroma"].max(axis=0)

features["rmse"] = npz["rmse"][metadata["index_librosa"].values,:]
#features["rmse"] -= features["rmse"].min(axis=0)
#features["rmse"] /= features["rmse"].max(axis=0)

features["spectral_centroid"] = npz["spectral_centroid"][metadata["index_librosa"].values,:]
#features["spectral_centroid"] -= features["spectral_centroid"].min(axis=0)
#features["spectral_centroid"] /= features["spectral_centroid"].max(axis=0)

features["spectral_bandwidth"] = npz["spectral_bandwidth"][metadata["index_librosa"].values,:]

features["spectral_contrast"] = npz["spectral_contrast"][metadata["index_librosa"].values,:]

features["spectral_rolloff"] = npz["spectral_rolloff"][metadata["index_librosa"].values,:]

features["tonnetz"] = npz["tonnetz"][metadata["index_librosa"].values,:]
#features["tonnetz"] -= features["tonnetz"].min(axis=0)
#features["tonnetz"] /= features["tonnetz"].max(axis=0)

features["zero_crossing_rate"] = npz["zero_crossing_rate"][metadata["index_librosa"].values,:]
#features["zero_crossing_rate"] -= features["zero_crossing_rate"].min(axis=0)
#features["zero_crossing_rate"] /= features["zero_crossing_rate"].max(axis=0)

features["bpm"] = npz["bpm"][metadata["index_librosa"].values]
features["bpm"] = features["bpm"].reshape((features["bpm"].shape[0],1))

npz.close()

In [8]:
def calc_similar_items(query_idx):
    
    dists_ssd = distance_measures["canberra"](features["ssd"], 
                                              features["ssd"][query_idx,:])
    dists_ssd = 1 - (dists_ssd / dists_ssd.max())

    dists_rp = distance_measures["canberra"](features["rp"],
                                             features["rp"][query_idx,:])
    dists_rp = 1 - (dists_rp / dists_rp.max())

    dists_mfcc = distance_measures["canberra"](features["mfcc"], 
                                               features["mfcc"][query_idx,:])
    dists_mfcc = 1 - (dists_mfcc / dists_mfcc.max())

    dists_chroma = distance_measures["canberra"](features["chroma"], 
                                                 features["chroma"][query_idx,:])
    dists_chroma = 1 - (dists_chroma / dists_chroma.max())

    dists_bpm = distance_measures["canberra"](features["bpm"], 
                                              features["bpm"][query_idx])
    dists_bpm = 1 - (dists_bpm / dists_bpm.max())
    
    dists_rmse = distance_measures["canberra"](features["rmse"], 
                                               features["rmse"][query_idx,:])
    dists_rmse = 1 - (dists_rmse / dists_rmse.max())
    
    dists_spectral_centroid = distance_measures["canberra"](features["spectral_centroid"], 
                                                            features["spectral_centroid"][query_idx,:])
    dists_spectral_centroid = 1 - (dists_spectral_centroid / dists_spectral_centroid.max())
    
    dists_tonnetz = distance_measures["canberra"](features["tonnetz"], 
                                                  features["tonnetz"][query_idx,:])
    dists_tonnetz = 1 - (dists_tonnetz / dists_tonnetz.max())
    
    dists_zero_crossing_rate = distance_measures["canberra"](features["zero_crossing_rate"], 
                                                             features["zero_crossing_rate"][query_idx,:])
    dists_zero_crossing_rate = 1 - (dists_zero_crossing_rate / dists_zero_crossing_rate.max())
    
    sims = (dists_ssd                * 0.9  + \
            dists_rp                 * 1.99 + \
            dists_mfcc               * 2.5  + \
            dists_chroma             * 1.3  + \
            dists_bpm                * 0.8  + \
            dists_rmse               * 1.0  + \
            dists_spectral_centroid  * 0.9  + \
            dists_tonnetz            * 1.3  + \
            dists_zero_crossing_rate * 0.2)
    
    sims /= sims.max()

    nn = np.argsort(sims)[::-1]

    return sims, nn

## Perform evaluations

In [9]:
def perform_evaluation(eval_class_ids, cutoff, sub_sample_size=None):
    
    result_precision = []
    result_recall    = []
    
    # define size of eval-set
    if sub_sample_size:
        eval_instances = np.random.choice(eval_class_ids, sub_sample_size)
    else:
        eval_instances = eval_class_ids
        
    eval_instances = eval_instances.astype(np.uint32)
    
    i = 0
    
    # iterate over all eval-instances
    for eval_id in eval_instances:
        
        # calculate nearest neighbors
        sims, nn = calc_similar_items(eval_id)
        
        # select evaluation frame of the similarity list
        eval_result_list_cutoff = nn[1:cutoff+1]
        
        # calculcate true positives
        true_positives = np.intersect1d(eval_result_list_cutoff, eval_class_ids)
        
        # calculate precision
        precision = float(true_positives.shape[0]) / float(cutoff)
        
        result_precision.append(precision)
        
        i += 1
        
        if i % 10 == 0:
            print i
                
            
    return result_precision

#perform_evaluation(eval_class_ids, 10, 10)

## Build Ground-Truth Queries

In [36]:
elements_to_search  = ["title",
                       "aggregations.0.edmDataProvider.def", 
                       "aggregations.0.edmProvider.def",
                       "proxies.0.dctermsCreated.def", 
                       "proxies.0.dctermsExtent.def", 
                       "proxies.0.dctermsMedium.def", 
                       "proxies.0.dctermsSpatial.def",
                       "proxies.0.dctermsTOC.def",
                       "proxies.0.dctermsIsPartOf.def",
                       "proxies.0.dcFormat.def",
                       "proxies.0.dcCoverage.def",
                       "proxies.0.dcContributor.def",
                       "proxies.0.dcCreator.def",
                       "proxies.0.dcSubject.def",                       
                       "proxies.0.dcLanguage.def",
                       "proxies.0.dcDescription.def",
                       "proxies.0.dcType.def",
                       "europeanaAggregation.edmCountry.def",
                       "europeanaAggregation.edmLanguage.def"
                      ]

In [11]:
#keywords = ["string", "quartet", "a minor", "andante"]
#keywords = ["piano", "Eb", "allegro", "sonata"]
#keywords = ["string", "quartet", "allegro", "C Minor"]
#keywords = ["bebop", "jazz"]
#keywords = ["cassette", "fiddle"]
keywords = ["interview"]

logic_result = []

for elem in elements_to_search:
    logic_result.append(np.logical_and.reduce([metadata2[elem].str.contains(keyword, case=False).fillna(False) for keyword in keywords]))

# apply mask to joined metadata
res = m[np.logical_or.reduce(logic_result)]

# only select instances where audio features are available
res = res[(res["index"].isnull() == False) & (res["index_librosa"].isnull() == False)]

res

KeyError: 'titlea'

In [12]:
get_logical_mask = lambda data, terms: [np.logical_or.reduce([data[elem].str.contains(keyword, case=False).fillna(False) 
                                               for keyword in terms]) for elem in elements_to_search]

join_logical_mask = lambda logical_mask: np.logical_or.reduce(logical_mask)

filter_metadata = lambda data, terms: data[join_logical_mask(get_logical_mask(data, terms))]

filter_available_audio_features = lambda data: data[(data["index"].apply(np.isnan) == False) & 
                                                    (data["index_librosa"].apply(np.isnan) == False) &
                                                    (data["index"] < features["rp"].shape[0])]

In [13]:
import nltk
from nltk.corpus import stopwords

In [14]:
test_columns = ["titlea",
                "titleb",
                "proxies.0.dctermsSpatial.def",
                "proxies.0.dctermsTOC.def",
                "proxies.0.dcContributor.def",
                "proxies.0.dcSubject.def",
                "proxies.0.dcDescription.def",
                "proxies.0.dctermsMedium.def",
                "proxies.0.dctermsTOC.def",
                "proxies.0.dctermsMedium.def"]

words = []

for tcol in test_columns:

    print tcol
    
    for t in nltk.Text(res[tcol]):

        try:

            if t != None:
                wordtokens = nltk.word_tokenize(t)

                #print "w:", w

                for w in wordtokens:
                    words.append(w.lower())
                #rint t2
                #break

        except:
            print t

filtered_words = [word for word in words if word not in stopwords.words('english')]

fd = nltk.FreqDist(words)

fd_pd = pd.DataFrame({"words":fd.keys(), "counts":fd.values()})

fin = fd_pd[fd_pd["words"].isin(stopwords.words('english')) == False]
fin = fin[fin["words"].isin(stopwords.words('german')) == False]
fin = fin[fin["words"].isin(stopwords.words('italian')) == False]
fin = fin[fin["words"].str.len() > 2]
fin.sort(["counts"], ascending=False)[["words","counts"]][:50]

titlea


NameError: name 'res' is not defined

### Classical music

In [51]:
res = filter_metadata(metadata, ["classical"])
res.shape

(28569, 29)

In [292]:
res = filter_metadata(m, ["jazz"])

#res = filter_metadata(res, ["classical"])
            
#res = filter_metadata(res, ["jazz"])
#res = filter_metadata(res, ["modern"])

#res = filter_metadata(res, ["g major", "g dur", "g-dur" "g majeur"])

res = filter_available_audio_features(res)

res.shape

(1094, 37)

top 5

In [293]:
eval_precision = perform_evaluation(res["index"].values, 5, 10)
np.mean(eval_precision)

10


0.26000000000000001

In [52]:
eval_precision = perform_evaluation(res["index"].values, 1, 10)
np.mean(eval_precision)

10


0.29999999999999999

In [49]:
eval_precision = perform_evaluation(res["index"].values, 24, 10)
np.mean(eval_precision)

10


0.079166666666666663

In [287]:
eval_precision = perform_evaluation(res["index"].values, 5)
np.mean(eval_precision)

10
20
30


0.11282051282051284

top 10

In [288]:
eval_precision = perform_evaluation(res["index"].values, 10)
np.mean(eval_precision)

10
20
30


0.089743589743589758

top 15

In [289]:
eval_precision = perform_evaluation(res["index"].values, 15)
np.mean(eval_precision)

10
20
30


0.092307692307692313

top 20

In [290]:
eval_precision = perform_evaluation(res["index"].values, 20)
np.mean(eval_precision)

10
20
30


0.09358974358974359

full page

In [291]:
eval_precision = perform_evaluation(res["index"].values, 24)
np.mean(eval_precision)

10
20
30


0.090811965811965809