In [1]:
import numpy as np


def mean_reciprocal_rank(rs):
    """Score is reciprocal of the rank of the first relevant item
    First element is 'rank 1'.  Relevance is binary (nonzero is relevant).
    Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
    >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.61111111111111105
    >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
    >>> mean_reciprocal_rank(rs)
    0.5
    >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.75
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean reciprocal rank
    """
    rs = (np.asarray(r).nonzero()[0] for r in rs)
    return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])



In [2]:
def r_precision(r):
    """Score is precision after all relevant documents have been retrieved
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> r_precision(r)
    0.33333333333333331
    >>> r = [0, 1, 0]
    >>> r_precision(r)
    0.5
    >>> r = [1, 0, 0]
    >>> r_precision(r)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        R Precision
    """
    r = np.asarray(r) != 0
    z = r.nonzero()[0]
    if not z.size:
        return 0.
    return np.mean(r[:z[-1] + 1])


In [3]:
def precision_at_k(r, k):
    """Score is precision @ k
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Precision @ k
    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)

In [4]:
def average_precision(r):
    """Score is average precision (area under PR curve)
    Relevance is binary (nonzero is relevant).
    >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
    >>> delta_r = 1. / sum(r)
    >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
    0.7833333333333333
    >>> average_precision(r)
    0.78333333333333333
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)

In [5]:
def mean_average_precision(rs):
    """Score is mean average precision
    Relevance is binary (nonzero is relevant).
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
    >>> mean_average_precision(rs)
    0.78333333333333333
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
    >>> mean_average_precision(rs)
    0.39166666666666666
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean average precision
    """
    return np.mean([average_precision(r) for r in rs])

In [6]:
def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.

In [7]:
def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max


In [9]:
# if __name__ == "__main__":
#     import doctest
#     doctest.testmod()

In [10]:
import pandas as pd

gts = pd.DataFrame.from_dict([
    {'query': 'q1', 'document': 'doc2'},
    {'query': 'q1', 'document': 'doc3'},
    {'query': 'q2', 'document': 'doc7'},
])

results = pd.DataFrame.from_dict([
    {'query': 'q1', 'document': 'doc1', 'rank': 1},
    {'query': 'q1', 'document': 'doc2', 'rank': 2},
    {'query': 'q1', 'document': 'doc3', 'rank': 3},
    {'query': 'q2', 'document': 'doc4', 'rank': 1},
    {'query': 'q2', 'document': 'doc5', 'rank': 2},
    {'query': 'q2', 'document': 'doc6', 'rank': 3},
])

MAX_RANK = 100000

hits = pd.merge(gts, results, on=["query", "document"], how="left").fillna(MAX_RANK)

mrr = (1 / hits.groupby('query')['rank'].min()).mean()

print(mrr)

0.250005


In [20]:
import numpy as np 

In [21]:
def rr(ss):
    i = 1
    for s in ss:
        if s == True:
            return 1.0 / float(i)

        else:
            i = i + 1

In [22]:
def mrr(scores):
    i = 1
    result = 0
    for score in scores:
        result  = result + rr(score)
        i = i + 1
    return result / i

In [37]:
if __name__ == '__main__':

    s = np.array([1,0,0])
    print (rr(s))
    s = np.array([0,0,1])
    print (rr(s))
    s = np.array([0,1,0])
    print (rr(s))
    m = np.array([[1, 0, 0, 0, 0 ,0 ],
                  [0, 0, 1, 0, 0 ,0 ]])
    print (mrr(m))
    # error input
    m = np.array([[1, 0, 0, 0, 0 ,1 ],
                  [1, 0, 0, 0, 0 ,1 ],
                  [1, 0, 0, 0, 0 ,1 ]])
    print (mrr(m))

In [2]:
!pip install "tensorflow>=1.13.1,<2.0"
!pip install "tensorflow-gpu>=1.13.1,<2.0"
!pip install ampligraph

Collecting ampligraph
  Using cached ampligraph-1.3.2-py3-none-any.whl (152 kB)
Collecting rdflib>=4.2.2
  Using cached rdflib-5.0.0-py3-none-any.whl (231 kB)
Collecting sphinx-rtd-theme>=0.4.0
  Using cached sphinx_rtd_theme-0.5.1-py2.py3-none-any.whl (2.8 MB)
Collecting beautifultable>=0.7.0
  Using cached beautifultable-1.0.1-py2.py3-none-any.whl (27 kB)
Collecting pytest>=3.5.1
  Using cached pytest-6.2.2-py3-none-any.whl (280 kB)
Collecting sphinxcontrib-bibtex>=0.4.0
  Using cached sphinxcontrib_bibtex-2.2.0-py3-none-any.whl (31 kB)
Collecting recommonmark>=0.4.0
  Using cached recommonmark-0.7.1-py2.py3-none-any.whl (10 kB)
Collecting py>=1.8.2
  Using cached py-1.10.0-py2.py3-none-any.whl (97 kB)
Collecting iniconfig
  Using cached iniconfig-1.1.1-py2.py3-none-any.whl (5.0 kB)
Collecting isodate
  Using cached isodate-0.6.0-py2.py3-none-any.whl (45 kB)
Collecting commonmark>=0.8.1
  Using cached commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
Collecting pybtex-docutils>=1.0.0
  U

In [8]:
import numpy as np
from ampligraph.evaluation.metrics import mrr_score
rankings = np.array([1, 12, 1, 2])
mrr_score(rankings)
print(mrr_score(rankings))

0.6458333333333333
