# 4.- Model evaluation

## Notebook setup

## Defining the evaluation metrics

In [None]:
def hits_at_n(ranks, n):
    return sum([1 if r <= n else 0 for r in ranks]) / len(ranks)

def mr_score(ranks):
    mr_score = 0
    for r in ranks:
        mr_score += r
    return mr_score / len(ranks)

def mrr_score(ranks):
    mr_score = 0
    for r in ranks:
        mr_score += 1 / r
    return mr_score / len(ranks)

## Testing functions

In [None]:
import pdb

def get_testing_entities(test_graph_static, prop = None):
    # get all entities that have a new P31 value in the test data
    entities_test = set()
    for t in test_graph_static.triples((None, prop, None)):
        entities_test.add((str(t[0]), str(t[1]), str(t[2])))
    return entities_test

In [None]:
def test_system_supervised(model, entities_test, entity_2_embeddings, all_classes, specific_prop=False):
    ranks = []
    misses = 0
    _all = 0
    
    # set up pred array
    if specific_prop:
        # model trained for a specific property, embeddings = subj + obj
        embedding_size = len(list(entity_2_embeddings.values())[0])
        X = np.zeros((len(all_classes), embedding_size * 2))
        for i, kg_class in enumerate(all_classes):
            X[i, embedding_size:] = entity_2_embeddings[kg_class]
    else:
        # generic model useful for any property, embeddings = subj + prop + obj
        embedding_size = len(list(entity_2_embeddings.values())[0])
        X = np.zeros((len(all_classes), embedding_size * 3))
        for i, kg_class in enumerate(all_classes):
            X[i, 2*embedding_size:] = entity_2_embeddings[kg_class]
        
    
    for entity, prop, true_class in tqdm(entities_test):
        _all += 1
        #print(f"Entity: {entity}")
        #print("-" * 25)
            
        if specific_prop:
            X[:, :embedding_size] = entity_2_embeddings[entity]
        else:
            X[:, :embedding_size] = entity_2_embeddings[entity]
            X[:, embedding_size:2*embedding_size] = entity_2_embeddings[prop]

        pred = model.predict_proba(X)
        entity_results = [(kg_class, pred[idx][1]) for idx, kg_class in enumerate(all_classes)]
        entity_results.sort(key=lambda item: item[1], reverse=True)
        sorted_predictions = [e[0] for e in entity_results]
        if true_class not in sorted_predictions:
            misses += 1
            continue
        idx = sorted_predictions.index(true_class)
        ranks.append(idx + 1)
    print(f"MR score: {mr_score(ranks)}")
    print(f"MRR score: {mrr_score(ranks)}")
    print(f"hits@1: {hits_at_n(ranks, 1)}")
    print(f"hits@5: {hits_at_n(ranks, 5)}")
    print(f"hits@10: {hits_at_n(ranks, 10)}")
    print(misses)
    print(_all)
    return ranks, misses, _all

In [None]:
def test_system_unsupervised(model, entities_test):
    ranks = []
    misses = 0
    _all = 0
    for entity, prop, true_class in tqdm(entities_test):
        _all += 1
        sorted_predictions = model.predict_tail(entity, prop)
        if true_class not in sorted_predictions:
            misses += 1
            continue
        idx = sorted_predictions.index(true_class)
        ranks.append(idx + 1)
    
    print(f"MR score: {mr_score(ranks)}")
    print(f"MRR score: {mrr_score(ranks)}")
    print(f"hits@1: {hits_at_n(ranks, 1)}")
    print(f"hits@5: {hits_at_n(ranks, 5)}")
    print(f"hits@10: {hits_at_n(ranks, 10)}")
    print(misses)
    print(_all)
    return ranks, misses, _all

## Evaluating the models