In [35]:
import torch
import numpy as np


In [50]:
def rearrange(energy_scores, candidate_position_idx, true_position_idx):
    tmp = np.array([[x==y for x in candidate_position_idx] for y in true_position_idx]).any(0)
    correct = np.where(tmp)[0]
    incorrect = np.where(~tmp)[0]
    labels = torch.cat((torch.ones(len(correct)), torch.zeros(len(incorrect)))).int()
    energy_scores = torch.cat((energy_scores[correct], energy_scores[incorrect]))
    return energy_scores, labels

In [51]:
query = 'a'

energy_scores = torch.tensor([2, 1, 3])
candidate_position_idx = [('a', 'b'), ('b', 'c'), ('c', 'd')]
# (parent, child)
node2pos = [('g', 'e'), ('a', 'b')]




In [52]:
batched_energy_scores, labels = rearrange(energy_scores, candidate_position_idx, node2pos)

In [53]:
import re 

def calculate_ranks_from_distance(all_distances, positive_relations):
    """
    all_distances: a np array
    positive_relations: a list of array indices

    return a list
    """
    # positive_relation_distance = all_distances[positive_relations]
    # negative_relation_distance = np.ma.array(all_distances, mask=False)
    # negative_relation_distance.mask[positive_relations] = True
    # ranks = list((negative_relation_distance < positive_relation_distance[:, np.newaxis]).sum(axis=1) + 1)
    # ranks = list((all_distances < positive_relation_distance[:, np.newaxis]).sum(axis=1) + 1)
    ranks = list(np.argsort(np.argsort(all_distances))[positive_relations]+1)
    return ranks

def obtain_ranks(outputs, targets):
    """ 
    outputs : tensor of size (batch_size, 1), required_grad = False, model predictions
    targets : tensor of size (batch_size, ), required_grad = False, labels
        Assume to be of format [1, 0, ..., 0, 1, 0, ..., 0, ..., 0]
    mode == 0: rank from distance (smaller is preferred)
    mode == 1: rank from similarity (larger is preferred)
    """
    calculate_ranks = calculate_ranks_from_distance
    all_ranks = []
    prediction = outputs.cpu().numpy().squeeze()
    label = targets.cpu().numpy()
    sep = np.array([0, 1], dtype=label.dtype)
    
    # fast way to find subarray indices in a large array, c.f. https://stackoverflow.com/questions/14890216/return-the-indexes-of-a-sub-array-in-an-array
    end_indices = [(m.start() // label.itemsize)+1 for m in re.finditer(sep.tostring(), label.tostring())]
    end_indices.append(len(label)+1)
    start_indices = [0] + end_indices[:-1]
    for start_idx, end_idx in zip(start_indices, end_indices):
        distances = prediction[start_idx: end_idx]
        labels = label[start_idx:end_idx]
        positive_relations = list(np.where(labels == 1)[0])
        ranks = calculate_ranks(distances, positive_relations)
        all_ranks.append(ranks)
    return all_ranks

In [54]:
all_ranks = obtain_ranks(batched_energy_scores, labels)



In [55]:
all_ranks

[[2]]

In [56]:
import itertools

def macro_mr(all_ranks):
    macro_mr = np.array([np.array(all_rank).mean() for all_rank in all_ranks]).mean()
    return macro_mr

def micro_mr(all_ranks):
    micro_mr = np.array(list(itertools.chain(*all_ranks))).mean()
    return micro_mr

def hit_at_1(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 1)
    return 1.0 * hits / len(rank_positions)

def hit_at_3(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 3)
    return 1.0 * hits / len(rank_positions)

def hit_at_5(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 5)
    return 1.0 * hits / len(rank_positions)

def hit_at_10(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 10)
    return 1.0 * hits / len(rank_positions)

def precision_at_1(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 1)
    return 1.0 * hits / len(all_ranks)

def precision_at_3(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 3)
    return 1.0 * hits / (len(all_ranks)*3)

def precision_at_5(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 5)
    return 1.0 * hits / (len(all_ranks)*5)

def precision_at_10(all_ranks):
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    hits = np.sum(rank_positions <= 10)
    return 1.0 * hits / (len(all_ranks)*10)

def mrr_scaled_10(all_ranks):
    """ Scaled MRR score, check eq. (2) in the PinSAGE paper: https://arxiv.org/pdf/1806.01973.pdf
    """
    rank_positions = np.array(list(itertools.chain(*all_ranks)))
    
    scaled_rank_positions = np.ceil(rank_positions / 10)
 #   print(scaled_rank_positions, (1.0 / scaled_rank_positions).mean())
    return (1.0 / scaled_rank_positions).mean()

In [57]:
mrr_scaled_10(all_ranks)

1.0

In [82]:
import pickle
test_path = '../data/psychology/test_nodes.pickle'
with open(test_path, 'rb') as f:
    test = pickle.load(f)

In [83]:
def clean(s):
    return s.split('.')[0]

new_test = []
for child, parents in test:
    temp = []
    for parent in parents:
        temp.append((clean(parent), clean(child)))
    new_test.append(temp)

In [84]:
new_test

[[('psychiatry', 'social construction of schizophrenia')],
 [('methadone maintenance', 'use heroin')],
 [('social psychology', 'coordinated management of meaning')],
 [('inclusive fitness', 'green beard effect'),
  ('competitive altruism', 'green beard effect')],
 [('psychosis', 'schizoid personality')],
 [('neuroscience', 'olfactory toxicity in fish')],
 [('air sacs', 'abdominal air sac')],
 [('sodium inosinate', 'sodium guanylate')],
 [('delusional parasitosis', 'psychogenic parasitosis')],
 [('psychiatry', 'pirisudanol')],
 [('psychiatry', 'anisochromasia')],
 [('orthopedic surgery', 'brace removal')],
 [('psychological intervention', 'motivational interviewing')],
 [('mathematics education', 'result category')],
 [('neuroscience', 'allocortices')],
 [('grief', 'loss grief')],
 [('clinical psychology', 'medication effects'),
  ('psychiatry', 'medication effects')],
 [('electromyography', 'left sternocleidomastoid')],
 [('ionomycin', 'calcium ionophores')],
 [('suicide prevention', '

In [88]:
pred_path = '../../../data/taxonomy/model_outputs/_meta-llama-Llama-2-7b-hfTaxoEnrich_Psy_32bs_3e-4_pretrained_wn_0'
with open(pred_path, 'rb') as f:
    pred = pickle.load(f)

def get_hypernyms(line):
    clean_line = line.strip().replace("\n", ",").replace("-", " ").split(",")

    res = []
    for hyp in clean_line:
        if not hyp in ("", " ", ", ", ","):
            res.append(hyp.lower().strip())

    return res
new_pred = [elem[1] for elem in pred]

In [89]:
metric_names = {
    'mrr': mrr_scaled_10,
    'p1': precision_at_1,
    'p5': precision_at_5,
    'r1': hit_at_1,
    'r5': hit_at_5
}

metrics = {}
for name in metric_names.keys():
    metrics[name] = []
for idx in range(len(new_test)):
    hyps = get_hypernyms(new_pred[idx])
    gold = new_test[idx]

    child = gold[0][1]
    new_hyps = [(hyp, child) for hyp in hyps]
    scores = torch.arange(len(new_hyps))

    batched_energy_scores, labels = rearrange(scores, new_hyps, gold)

    all_ranks = obtain_ranks(batched_energy_scores, labels)
    for name, func in metric_names.items():
        cur_metric = np.nan_to_num(func(all_ranks))
        metrics[name].append(cur_metric)


  


In [90]:
for name, v in metrics.items():
    print(name, np.mean(v))

mrr 0.239
p1 0.178
p5 0.06760000000000001
r1 0.13894444444444443
r5 0.2321388888888889


  end_indices = [(m.start() // label.itemsize)+1 for m in re.finditer(sep.tostring(), label.tostring())]


1.0