# NDCG & MAP walkthrough
Synthetic Swag Labs search evaluation.

In [None]:
import pandas as pd
from pathlib import Path

judgments = pd.read_csv(Path('data/judgments.csv'))
runs = pd.read_csv(Path('data/runs.csv'))
judgments.head()

In [None]:
import math

def dcg(relevances):
    return sum(rel / math.log2(idx + 2) for idx, rel in enumerate(relevances))

def ndcg_at_k(jdf, rdf, k):
    grouped_rel = jdf.groupby('query_id')
    grouped_run = rdf.groupby('query_id')
    scores = []
    for qid, rel_group in grouped_rel:
        gains = rel_group.set_index('doc_id')['grade']
        run = grouped_run.get_group(qid).sort_values('rank').head(k)
        retrieved = [int(gains.get(doc_id, 0)) for doc_id in run['doc_id']]
        ideal = sorted(gains.tolist(), reverse=True)[:k]
        denom = dcg(ideal)
        scores.append(dcg(retrieved) / denom if denom else 0.0)
    return sum(scores) / len(scores)

def average_precision(relevances):
    hits = 0
    precision_sum = 0.0
    for idx, rel in enumerate(relevances, start=1):
        if rel > 0:
            hits += 1
            precision_sum += hits / idx
    return precision_sum / hits if hits else 0.0

def mean_average_precision(jdf, rdf, k=None):
    grouped_rel = jdf.groupby('query_id')
    grouped_run = rdf.groupby('query_id')
    scores = []
    for qid, rel_group in grouped_rel:
        gains = rel_group.set_index('doc_id')['grade']
        run = grouped_run.get_group(qid).sort_values('rank')
        if k:
            run = run.head(k)
        relevances = [int(gains.get(doc_id, 0)) for doc_id in run['doc_id']]
        scores.append(average_precision(relevances))
    return sum(scores) / len(scores)


In [None]:
summary = {
    'ndcg@3': ndcg_at_k(judgments, runs, 3),
    'ndcg@5': ndcg_at_k(judgments, runs, 5),
    'map@3': mean_average_precision(judgments, runs, 3),
    'map@5': mean_average_precision(judgments, runs, 5),
}
pd.DataFrame([summary])