In [1]:
import pandas as pd
import json
import os
import math

In [2]:
# Load queries
queries = ['avocado', 'stroopwafels', 'citroen', 'biologische eieren', 'tandpasta', 'wasmiddel', 'durex condooms', 
           'spa bruisend', 'kaas belegen', 'baby shampoo', 'hak appelmoes', 'aviko oven friet', 'groene olijven',
           'cashewnoten', 'amstel bier krat', 'paprika chips', 'vuilniszak', 'schnitzel', 'slagroom','shoarma'
]

In [3]:
def load_evals(dir):
    evals = []
    for file in os.listdir(dir):
        if file.endswith('.json'):
            input_file = open(os.path.join(dir, file))
            json_array = json.load(input_file)
            evals.append(json_array)
    return evals

In [4]:
def mean_score(evals, score_index):
    sum = 0
    for e in evals:
        sum += e['metrics'][score_index]['value']
    return round(sum / len(evals), 3)

In [5]:
def dcg(evals):
    sum = 0
    for e in evals:
        k = evals.index(e) + 1
        rel = int(e['relevant'])
        sum += (math.pow(2, rel) - 1)/math.log2(k+1)
    return sum

def dcg_query(query):
    evals = query['evaluations']
    return dcg(evals)

def ndcg_query(query):
    evals = query['evaluations']
    _dcg = dcg(evals)
    evals_ideal = sorted(evals, key = lambda e: e['relevant'], reverse=True)
    dcg_ideal = dcg(evals_ideal)

    return _dcg/dcg_ideal

In [9]:
def mean_ndcg(evals):
    sum = 0
    for e in evals:
        sum += ndcg_query(e)
    return round(sum / len(evals), 3)

In [10]:
stijn = load_evals('../data/evaluations/grp4_stijn')
don = load_evals('../data/evaluations/grp6_don')
wessel = load_evals('../data/evaluations/grp6_wessel')

In [11]:
averages = []
for q in queries:
    st = next((e for e in stijn if e['query'].strip() == q), None)
    dn = next((e for e in don if e['query'].strip() == q), None)
    ws = next((e for e in wessel if e['query'].strip() == q), None)
    
    avg = {}
    avg['query_nr'] = queries.index(q)+1
    avg['query'] = st['query']
    avg['mp@5'] = mean_score([st, dn, ws], 0)
    avg['mp@10'] = mean_score([st, dn, ws], 1)
    avg['mrr'] = mean_score([st, dn, ws], 3)
    avg['mAP'] = mean_score([st, dn, ws], 4)
    avg['mndcg'] = mean_ndcg([st, dn, ws])
    averages.append(avg)
    
df_averages = pd.DataFrame(averages)

In [12]:
df_averages

Unnamed: 0,mAP,mndcg,mp@10,mp@5,mrr,query,query_nr
0,0.917,0.971,0.4,0.6,1.0,avocado,1
1,0.881,0.947,0.9,0.8,1.0,stroopwafels,2
2,0.75,0.877,0.2,0.4,1.0,citroen,3
3,1.0,1.0,0.4,0.8,1.0,biologische eieren,4
4,1.0,1.0,1.0,1.0,1.0,tandpasta,5
5,1.0,1.0,1.0,1.0,1.0,wasmiddel,6
6,0.87,0.956,0.8,0.8,1.0,durex condooms,7
7,0.96,0.987,0.933,0.933,1.0,spa bruisend,8
8,0.986,1.0,0.967,1.0,1.0,kaas belegen,9
9,1.0,1.0,0.4,0.8,1.0,baby shampoo,10


In [13]:
df_averages.mean()

mAP          0.91555
mndcg        0.95730
mp@10        0.73665
mp@5         0.82000
mrr          0.97220
query_nr    10.50000
dtype: float64

## Interassessor Agreement

In [32]:
def extract_evaluations(sorted_list):
    evals = []
    for q in sorted_list:
        evals.append([int(e['relevant']) for e in q['evaluations']])
        
    return evals

In [33]:
st_sorted = sorted(stijn, key = lambda e: e['query'].strip())
dn_sorted = sorted(don, key = lambda e: e['query'].strip())
ws_sorted = sorted(wessel, key = lambda e: e['query'].strip())

In [40]:
st_evals = extract_evaluations(st_sorted)
dn_evals = extract_evaluations(dn_sorted)
ws_evals = extract_evaluations(ws_sorted)

In [88]:
def nr_agreed(evals1, evals2):
    print(evals1 + evals2)
    
def coef(ev1, ev2):
    all_evals_1 = [val for sublist in ev1 for val in sublist]
    all_evals_2 = [val for sublist in ev2 for val in sublist]
    
    rel1 = sum(all_evals_1)
    rel2 = sum(all_evals_2)
    
    agreed_total = [int(all_evals_1[i] == all_evals_2[i]) for i in range(len(all_evals_1))]
    agreed_nr = sum(agreed_total)
    total_evals = len(agreed_total)
    
    pA = agreed_nr / total_evals
    pRel = rel1 / total_evals * rel2 / total_evals
    pIrrel = (total_evals - rel1) / total_evals * (total_evals - rel2) / total_evals
    pE = pRel + pIrrel
    coef = (pA - pE) / (1 - pE)
    return coef

In [89]:
a = coef(st_evals, dn_evals)
b = coef(st_evals, ws_evals)
c = coef(dn_evals, ws_evals)

print((a + b + c)/3)

0.7674039746281536
