In [12]:
import torch
import pykeen
from pykeen import predict
from pykeen.pipeline import pipeline
from pykeen.datasets import get_dataset

In [3]:
mps_device = torch.device("mps")

In [25]:
ctd_only_model = torch.load('results/baseline/transe_hetionet_only_ctd_relation/trained_model.pkl')

In [34]:
best_transe_model = torch.load('results/baseline/transe_hetionet_best_hpo2/trained_model.pkl')

In [13]:
dataset = get_dataset(dataset="hetionet")

In [14]:
triples_factory = dataset.training
test_triples = dataset.testing.mapped_triples
ctd_id = torch.as_tensor(triples_factory.relations_to_ids(["CtD"]))
ctd_triples = test_triples[test_triples[:, 1] == ctd_id]

In [None]:
ctd_triples

In [15]:
def evaluate_semantic_awareness(triples, model, triples_factory):
   final_semantic_awareness_score = 0
   for triple in triples:
        prediction_for_tail = predict.predict_target(model=model, relation="CtD", head=triple[0].item(), triples_factory=triples_factory)
        prediction_for_tail_top_10 = prediction_for_tail.df.nlargest(n=10, columns="score")
        prediction_for_head = predict.predict_target(model=model, relation="CtD", tail=triple[2].item(), triples_factory=triples_factory)
        prediction_for_head_top_10 = prediction_for_head.df.nlargest(n=10, columns="score")
        count_of_expected_head = len(prediction_for_head_top_10[prediction_for_head_top_10["head_label"].str.startswith("Compound")])
        count_of_expected_tail = len(prediction_for_tail_top_10[prediction_for_tail_top_10["tail_label"].str.startswith("Disease")])
        semantic_score = ((count_of_expected_head + count_of_expected_tail) / (len(prediction_for_tail_top_10) + len(prediction_for_head_top_10)))
        final_semantic_awareness_score += semantic_score
   final_semantic_awareness_score = final_semantic_awareness_score / len(triples)
   return final_semantic_awareness_score

In [92]:
sem_score_std_only = evaluate_semantic_awareness(ctd_triples, ctd_only_model, dataset.training)
sem_score_std_only

0.897887323943661

In [93]:
best_hpo2_model = torch.load('results/baseline/transe_hetionet_only_ctd_relation/trained_model.pkl', mps_device)

In [94]:
sem_score_best_hpo2 = evaluate_semantic_awareness(ctd_triples, best_hpo2_model, dataset.training)
sem_score_best_hpo2

0.897887323943661

In [16]:
rotate_model = torch.load('results/baseline/rotate_best_hpo/trained_model.pkl')

In [17]:
sem_score_ctd_rotate_test = evaluate_semantic_awareness(ctd_triples, rotate_model, dataset.training)

In [18]:
sem_score_ctd_rotate_test

1.0

In [22]:
validation_triples = dataset.validation.mapped_triples
ctd_triples_validation = validation_triples[validation_triples[:, 1] == ctd_id]

In [23]:
sem_score_ctd_rotate_valiadtion = evaluate_semantic_awareness(ctd_triples_validation, rotate_model, dataset.training)
sem_score_ctd_rotate_valiadtion

1.0

In [26]:
sem_score_ctd_trans_valiadtion = evaluate_semantic_awareness(ctd_triples_validation, ctd_only_model, dataset.training)
sem_score_ctd_trans_valiadtion

0.8970588235294115

In [40]:
drd_id = torch.as_tensor(triples_factory.relations_to_ids(["DrD"]))
drd_triples = test_triples[test_triples[:, 1] == drd_id]
drd_triples_validation = validation_triples[validation_triples[:, 1] == drd_id]

In [30]:
def evaluate_semantic_awareness_drd(triples, model, triples_factory):
   final_semantic_awareness_score = 0
   for triple in triples:
        prediction_for_tail = predict.predict_target(model=model, relation="DrD", head=triple[0].item(), triples_factory=triples_factory)
        prediction_for_tail_top_10 = prediction_for_tail.df.nlargest(n=10, columns="score")
        prediction_for_head = predict.predict_target(model=model, relation="DrD", tail=triple[2].item(), triples_factory=triples_factory)
        prediction_for_head_top_10 = prediction_for_head.df.nlargest(n=10, columns="score")
        count_of_expected_head = len(prediction_for_head_top_10[prediction_for_head_top_10["head_label"].str.startswith("Disease")])
        count_of_expected_tail = len(prediction_for_tail_top_10[prediction_for_tail_top_10["tail_label"].str.startswith("Disease")])
        semantic_score = ((count_of_expected_head + count_of_expected_tail) / (len(prediction_for_tail_top_10) + len(prediction_for_head_top_10)))
        final_semantic_awareness_score += semantic_score
   final_semantic_awareness_score = final_semantic_awareness_score / len(triples)
   return final_semantic_awareness_score

In [38]:
sem_score_drd_rotate_test = evaluate_semantic_awareness_drd(drd_triples, rotate_model, dataset.training)
sem_score_drd_rotate_test

1.0

In [39]:
sem_score_drd_transe_test = evaluate_semantic_awareness_drd(drd_triples,best_transe_model, dataset.training)
sem_score_drd_transe_test

0.9419354838709679

In [41]:
sem_score_drd_rotate_validation = evaluate_semantic_awareness_drd(drd_triples_validation, rotate_model, dataset.training)
sem_score_drd_rotate_validation 

1.0

In [42]:
sem_score_drd_transe_validation = evaluate_semantic_awareness_drd(drd_triples_validation,best_transe_model, dataset.training)
sem_score_drd_transe_validation

0.953448275862069