---

# Evaluation metrics computation

>This notebook contains code for calculating evaluation metrics

---

In [66]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [117]:
queries = pd.read_csv('metadata/Queries.csv')
collection_ids = [int(f[:-4]) for f in os.listdir('chroma/raw')]
queries = queries[queries["Song ID"].isin(collection_ids)][["Query ID", "Song ID"]]
queries.head()

Unnamed: 0,Query ID,Song ID
0,q1,1118
1,q2,1438
4,q5,1808
7,q8,593
8,q9,1224


## Compute mean reciprocal ranks

In [118]:
def compute_mrr(feature, collection):
    reciprocal_ranks = list()
    
    for i, query in queries.iterrows():
        # Load results for the query
        results = pd.read_csv(f"results/{feature}/{collection}/{query['Query ID']}.csv")
    
        # Compute reciprocal rank for the query result
        rank = results.index[results['Song ID'] == query['Song ID']][0] + 1
        reciprocal_rank = 1 / rank
    
        reciprocal_ranks.append(reciprocal_rank)

    # Compute mean reciprocal rank
    mrr = sum(reciprocal_ranks) / len(reciprocal_ranks)

    return round(mrr * 100.0, 2)

In [119]:
def compute_precision(feature, collection, k):
    precisions = list()
    
    for i, query in queries.iterrows():
        # Load results for the query
        results = pd.read_csv(f"results/{feature}/{collection}/{query['Query ID']}.csv")
    
        # Compute precision for the query result
        rank = results.index[results['Song ID'] == query['Song ID']][0] + 1
        precision = 1 if rank <= k else 0
    
        precisions.append(precision)

    # Compute mean reciprocal rank
    p = sum(precisions) / len(precisions)

    return round(p * 100.0, 2)

In [120]:
def compute_scores(feature):
    scores = dict()
    
    for model in ['raw', 'spleeter']:#, 'openunmix']:
        # Compute metrics
        mrr = compute_mrr(feature, model)
        p1 = compute_precision(feature, model, 1)
        p3 = compute_precision(feature, model, 3)
        p10 = compute_precision(feature, model, 10)

        scores[model] = {'MRR': mrr, 'p@1': p1, 'p@3': p3, 'p@10': p10}

    scores = pd.DataFrame(scores).transpose()

    return scores

## Scores for chroma features

In [121]:
scores_chroma = compute_scores('chroma')
scores_chroma

Unnamed: 0,MRR,p@1,p@3,p@10
raw,11.9,7.94,11.11,17.46
spleeter,12.58,9.52,12.7,17.46


## Scores for pitch tracks

In [122]:
scores_pitch = compute_scores('pitch')
scores_pitch

Unnamed: 0,MRR,p@1,p@3,p@10
raw,1.16,0.0,0.0,0.0
spleeter,26.41,20.63,26.98,34.92
