# Basic System Evaluation

To fully understand the task, and the difficulties, I'll run one pass of evaluation in this NB. After the initial exploration, we can productionize this code.

In [56]:
# Imports
import sklearn
from sklearn.metrics import average_precision_score
import numpy as np

def apk(actual, predicted, k=None):
    """
    Computes the average precision at k.
    This function computes the average prescision at k between two lists of
    items.
    Parameters
    ----------
    actual : list
             A list of elements that are to be predicted (order doesn't matter)
    predicted : list
                A list of predicted elements (order does matter)
    k : int, optional
        The maximum number of predicted elements
    Returns
    -------
    score : double
            The average precision at k over the input lists
    """
    if k is None:
        k = len(predicted)
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

sub_tasks = ['celebrity', 'movie', 'restaurant', 'tvShow']
trainset_locs = ['../data/TRAIN SET/%s.TRAINSET.txt' % t for t in sub_tasks]
devset_locs = ['../data/DEV SET/%s.DEVSET.txt' % t for t in sub_tasks]
output_locs = ['../output/%s.txt' % t for t in sub_tasks]

In [70]:
# loading datasets
def LoadInData(data_loc, test_data=False):
    lines = unicode(open(data_loc).read(), 'gbk').split('\n')
    parsing_result = []
    for line in lines:
        terms = line.split('\t')
        items = []
        for i in terms[1:]:
            if test_data:
                ent, score = i, None

            else:
                colon_separated = i.split(':')
                ent = ':'.join(colon_separated[:-1])
                score = int(colon_separated[-1])
            items.append((ent, score))
        if len(items) == 0:
            continue
        parsing_result.append((terms[0], items))
    return parsing_result


# take celebrity as example
cel_train_data = LoadInData(trainset_locs[0])

# Basic strategies, and how to evaluate

def NoReorder(q, results):
    return results

def Reversed(q, results):
    return list(reversed(results))

def EvaluateByRank(strategy, train_data=cel_train_data):
    score_results = []
    for query, gs_result in train_data:
        my_result = strategy(query, [i for (i, t) in gs_result])
        gs_result = [i for i, t in gs_result if t == 1]
        score_results.append(apk(gs_result, my_result))
        
    return sum(score_results) / len(score_results)

def EvaluateAllByRank(strategy):
    result = {}
    for sub_task, train_loc in zip(sub_tasks, trainset_locs):
        train_data = LoadInData(train_loc)
        result[sub_task] = EvaluateByRank(strategy, train_data)
    return result

print EvaluateAllByRank(NoReorder)
print EvaluateAllByRank(Reversed)

{'movie': 1.0, 'celebrity': 1.0, 'tvShow': 1.0, 'restaurant': 1.0}
{'movie': 0.13324746754540753, 'celebrity': 0.06995830022609754, 'tvShow': 0.07271910815350574, 'restaurant': 0.08773748221660245}


In [71]:
def OrderByScore(func):
    def wrappee(q, results):
        return [r for s, r in sorted([
                    (func(q, r), r) for r in results
                ], reverse=True)]
    return wrappee

# Try a real metric
@OrderByScore
def CharOverlap(q, r):
    return len(set(q).intersection(set(r)))

print EvaluateAllByRank(CharOverlap)

{'movie': 0.32330062464220133, 'celebrity': 0.32605351222452034, 'tvShow': 0.22679582987293437, 'restaurant': 0.2338259004008226}


In [73]:
# exporting results into a file

for output_filename, testdata_loc in zip(output_locs, devset_locs):
    testdata = LoadInData(testdata_loc, test_data=True)
    with open(output_filename, 'w') as ofile:
        for query, entries in testdata:
            my_result = CharOverlap(query, [i for (i, t) in entries])
            print >> ofile, '\t'.join([query] + my_result).encode('gbk')