In [1]:
from collections import defaultdict
import torch

import mowl
mowl.init_jvm('10g')
from mowl.datasets import PathDataset
from mowl.projection import OWL2VecStarProjector
from mowl.walking import DeepWalk

from gensim.models.word2vec import LineSentence
from gensim.models import Word2Vec

from src.gnn import *
from src.utils import *

In [None]:
#dataset_name = 'family'
#dataset_name = 'pizza'
dataset_name = 'OWL2DL-1'

In [2]:
experiments = get_experimets(dataset_name)

**Fit**

In [3]:
def owl2vec_fit(file_name, embed_dim, run):
    dataset = PathDataset(ontology_path=f'datasets/bin/{file_name}_train.owl',
                          testing_path=f'datasets/bin/{file_name}_test.owl',
                          validation_path=f'datasets/bin/{file_name}_val.owl')
    if run:
        projector = OWL2VecStarProjector(bidirectional_taxonomy=True)
        edges = projector.project(dataset.ontology)
        
        walker = DeepWalk(num_walks=20,
                          walk_length=20,
                          alpha=0.1,
                          workers=4)         
        walks = walker.walk(edges)
        sentences = LineSentence(walker.outfile)
        model = Word2Vec(sentences, vector_size=embed_dim, epochs=500, window=5, min_count=1, workers=4)
        model.save(f'models/owl2vec_{file_name}.model')
        #triples_factory = Edge.as_pykeen(edges, create_inverse_triples=True)
        #trans_e = TransE(triples_factory=triples_factory, embedding_dim=embed_dim, random_seed=42)
        #model = KGEModel(triples_factory, trans_e, epochs=500, batch_size=32)
        #model.train()
    else:
        model = Word2Vec.load(f'models/owl2vec_{file_name}.model')
    return model

**Eval**

In [4]:
def owl2vec_eval(owl2vec_model, test_graph):
    vectors = owl2vec_model.wv
    words = list(owl2vec_model.wv.key_to_index)
    output_owl2vec = torch.tensor(vectors[words])
    
    nodes = list(set(words))
    nodes_dict = {node: i for i, node in enumerate(nodes)}
    
    i=0
    edge_data = defaultdict(list)
    for s, p, o in test_graph.triples((None, None, None)):
        s = s.n3()
        s = s.replace('<','')
        s = s.replace('>','')
        o = o.n3()
        o = o.replace('<','')
        o = o.replace('>','')
        try:
            src, dst = nodes_dict[s], nodes_dict[o]
            edge_data['edge_index'].append([src, dst])
        except:
            i+=1
    edge_index = torch.tensor(edge_data['edge_index']).reshape(2,-1)
    
    mrr, mean_rank, median_rank, hits_at_5, hits_at_10 = eval_hits(edge_index=edge_index,
                                                                   tail_pred=1,
                                                                   output=output_owl2vec,
                                                                   max_num=100)
    print(f'MRR: {mrr:.3f}, Mean Rank: {mean_rank:.3f}, Median Rank: {median_rank:.3f}, Hits@5: {hits_at_5:.3f}, Hits@10: {hits_at_10:.3f}')

**Experiments**

In [5]:
for experiment in experiments: 
    dataset_name = experiment['dataset_name']
    file_name = experiment['file_name']
    format_ = experiment['format_']
    add_noise = experiment['add_noise']

    train_graph, valid_graph, test_graph, test_membership_graph, test_subsumption_graph = split_ontology(dataset_name=dataset_name, 
                                                                                                         file_name=file_name, 
                                                                                                         format_=format_, 
                                                                                                         train_ratio=1, 
                                                                                                         add_noise=add_noise)
    owl2vec_model = owl2vec_fit(file_name=file_name, embed_dim=200, run=False)
    print('Membership:')
    owl2vec_eval(owl2vec_model, test_membership_graph)
    print('Subsumption:')
    owl2vec_eval(owl2vec_model, test_subsumption_graph)
    print()

Triplets found in OWL2DL-1.owl: 55215
Train Triplets found: 55215
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1.model', 'datetime': '2024-06-18T09:33:55.199778', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.039, Mean Rank: 56.802, Median Rank: 57.000, Hits@5: 0.022, Hits@10: 0.060
Subsumption:
MRR: 0.036, Mean Rank: 55.909, Median Rank: 65.000, Hits@5: 0.045, Hits@10: 0.091

Triplets found in OWL2DL-1.owl: 55215
Triplets found in OWL2DL-1_noisy_gnn_100.owl: 4802
Train Triplets found: 60017
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1_noisy_gnn_100.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1_noisy_gnn_100.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1_noisy_gnn_100.model', 'datetime': '2024-06-18T09:34:16.141017', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.027, Mean Rank: 59.315, Median Rank: 55.000, Hits@5: 0.017, Hits@10: 0.043
Subsumption:
MRR: 0.022, Mean Rank: 57.955, Median Rank: 61.000, Hits@5: 0.000, Hits@10: 0.000

Triplets found in OWL2DL-1.owl: 55215
Triplets found in OWL2DL-1_noisy_gnn_1000.owl: 48050
Train Triplets found: 103265
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1_noisy_gnn_1000.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1_noisy_gnn_1000.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1_noisy_gnn_1000.model', 'datetime': '2024-06-18T09:34:43.199486', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.030, Mean Rank: 59.974, Median Rank: 55.000, Hits@5: 0.013, Hits@10: 0.043
Subsumption:
MRR: 0.040, Mean Rank: 42.000, Median Rank: 35.500, Hits@5: 0.000, Hits@10: 0.182

Triplets found in OWL2DL-1.owl: 55215
Triplets found in OWL2DL-1_noisy_random_100.owl: 4761
Train Triplets found: 59976
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1_noisy_random_100.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1_noisy_random_100.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1_noisy_random_100.model', 'datetime': '2024-06-18T09:35:02.508443', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.034, Mean Rank: 57.586, Median Rank: 60.500, Hits@5: 0.017, Hits@10: 0.034
Subsumption:
MRR: 0.042, Mean Rank: 56.909, Median Rank: 47.500, Hits@5: 0.091, Hits@10: 0.091

Triplets found in OWL2DL-1.owl: 55215
Triplets found in OWL2DL-1_noisy_random_1000.owl: 47661
Train Triplets found: 102876
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1_noisy_random_1000.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1_noisy_random_1000.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1_noisy_random_1000.model', 'datetime': '2024-06-18T09:35:32.557617', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.035, Mean Rank: 63.711, Median Rank: 74.500, Hits@5: 0.026, Hits@10: 0.078
Subsumption:
MRR: 0.046, Mean Rank: 62.182, Median Rank: 79.000, Hits@5: 0.045, Hits@10: 0.091

Triplets found in OWL2DL-1.owl: 55215
Triplets found in OWL2DL-1_noisy_disjoint_100.owl: 6800
Train Triplets found: 62015
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1_noisy_disjoint_100.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1_noisy_disjoint_100.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1_noisy_disjoint_100.model', 'datetime': '2024-06-18T09:35:48.794843', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.073, Mean Rank: 38.271, Median Rank: 31.000, Hits@5: 0.126, Hits@10: 0.274
Subsumption:
MRR: 0.049, Mean Rank: 58.117, Median Rank: 69.000, Hits@5: 0.083, Hits@10: 0.183

Triplets found in OWL2DL-1.owl: 55215
Triplets found in OWL2DL-1_noisy_disjoint_1000.owl: 68000
Train Triplets found: 122434
Test Triplets (Membership) found: 23190
Test Triplets (Subsumption) found: 127


INFO:gensim.utils:loading Word2Vec object from models/owl2vec_OWL2DL-1_noisy_disjoint_1000.model
INFO:gensim.utils:loading wv recursively from models/owl2vec_OWL2DL-1_noisy_disjoint_1000.model.wv.* with mmap=None
INFO:gensim.utils:setting ignored attribute cum_table to None
INFO:gensim.utils:Word2Vec lifecycle event {'fname': 'models/owl2vec_OWL2DL-1_noisy_disjoint_1000.model', 'datetime': '2024-06-18T09:36:15.717862', 'gensim': '4.3.1', 'python': '3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22631-SP0', 'event': 'loaded'}


Membership:
MRR: 0.052, Mean Rank: 42.838, Median Rank: 38.000, Hits@5: 0.047, Hits@10: 0.083
Subsumption:
MRR: 0.058, Mean Rank: 48.533, Median Rank: 41.000, Hits@5: 0.083, Hits@10: 0.117

