In [1]:
import gc
import numpy as np
import pandas as pd
import pandas.api.types
import sklearn.metrics

In [33]:
def apk(actual, predicted, k=20):
    """
    Compute the average precision at k.

    This function computes the average prescision at k between two lists of
    items.

    Parameters
    ----------
    actual : list
             A list of elements that are to be predicted (order doesn't matter)
    predicted : list
                A list of predicted elements (order does matter)
    k : int, optional
        The maximum number of predicted elements

    Returns
    -------
    score : double
            The average precision at k over the input lists

    """
    if len(predicted) > k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0
    
    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i + 1.0)
    if not actual:
        print("no actual!")
        return 0.0
    print(len(actual), score / min(len(actual), k))
    return score / min(len(actual), k)

In [3]:
def mapk(actual, predicted, k=20):
    """
    Compute the mean average precision at k.

    This function computes the mean average prescision at k between two lists
    of lists of items.

    Parameters
    ----------
    actual : list
             A list of lists of elements that are to be predicted 
             (order doesn't matter in the lists)
    predicted : list
                A list of lists of predicted elements
                (order matters in the lists)
    k : int, optional
        The maximum number of predicted elements

    Returns
    -------
    score : double
            The mean average precision at k over the input lists

    """
    return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])

In [4]:
def prepare(df):
    df['categories'] = df['categories'].str.split(' ')
    df['osd'] = df['osd'].astype(float)
    return df

In [40]:
predicted = pd.read_csv('test_predicted.csv')
actual = pd.read_csv('test_actual.csv')
prepare(predicted)
prepare(actual)
mapk(actual=actual.categories, predicted=predicted.categories)

3 0.6666666666666666
5 0.2
1 1.0
3 1.0
3 1.0


0.7733333333333333