# IR Metrics



* Precision

In [1]:
import numpy as np

In [3]:

def precision(relevance: list):
    """
    Computes the precision of a given array
    :param relevance: a binary list
    :return: float value of the precision of the given list
    """
    l = np.array(relevance)
    return l.sum()/len(l)

precision([0,1,0,1])

0.25

* Precision at K

In [4]:
# [1,0,1,0]
# l[:k].sum()/k
#
# k = 1 -> 100%
#
# k = 2 -> 50%
#
# k = 3 -> 66.6%
#
# k = 4 -> 50%

def precision_at_k(relevance: list, k: int):
    """
    Computes the precision at k of a given array
    :param k: the value of k
    :param relevance: a binary list
    :return: float value of the precision at k of the given list
    """
    if k == 0:
        return 0
    l = np.array(relevance[:k]).sum()/k
    return l

precision_at_k([0, 0, 0, 1], 1)

0.0

* Recall at K

In [5]:
def recall_at_k(relevance: list, nr_relevant: int, k: int):
    """
    Computes the recall at k of a given array
    :param k: the value of k
    :param relevance: a binary list
    :return: float value of the recall at k of the given list
    """
    l = np.array(relevance[:k]).sum()/nr_relevant
    return l

recall_at_k([0, 0, 0, 1], 4, 1)

0.0

* Average precision

In [18]:
def average_precision(relevance):
    """
    Computes the average precision of a given list
    Supposes that the input binary vector contains all relevant documents.
    :param relevance: a binary list
    :return: float value of the average precision of the given list
    """
    length = len(relevance)
    sum = 0
    for i in range(length):
        if relevance[i]:
            sum += precision_at_k(relevance, i+1)
    
    return sum / np.array(relevance).sum()
average_precision([0, 1, 0, 1, 1, 1, 1])

0.5961904761904762

* Mean average precision

In [21]:
def mean_avg_precision(l):
    """
    Computes the MAP of a given list
    :param l: an array of arrays, one for each of the queries
    :return: float value of the MAP of the given list of lists
    """
    mean = np.array([ average_precision(lista) for lista in l]).mean()
    return mean

mean_avg_precision([[0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 1, 1], [0, 1, 0, 1, 1, 1, 1]])

0.35468253968253965

In [None]:
print(0.5961904761904762)

* DCG at K

In [29]:
def dcg_at_k(relevance, k: int):
    """
    Computes the dcg at k of a given list
    :param k: the value of k
    :param relevance: a binary list
    :return: float value of the dcg at k of the given list
    """

    sum = 0
    i =  0
    for rel_i in relevance[: k]:
        i+= 1
        sum += rel_i/np.log2(max(i, 2))
    
    return sum

dcg_at_k([4, 4, 3, 0, 0, 1, 3, 3, 3, 0], 6)

10.279642067948915

* NDCG at K


In [30]:
def ndcg_at_k(relevance, k):
    """
    Computes the normalized dcg at k of a given list
    :param k: the value of k
    :param relevance: a binary list
    :return: float value of the ndcg at k of the given list
    """
    rel_sorted = sorted(relevance, reverse=True)
    max = dcg_at_k(rel_sorted, k)
    real = dcg_at_k(relevance, k)

    return real/ max


ndcg_at_k([4, 4, 3, 0, 0, 1, 3, 3, 3, 0], 6)



0.7424602308163405