# 2.6 Ontology Embeddings (Task Vector)

### Subtask Vector.1 Run OWL2Vec* with the created ontology and generated data. Test three different configurations. Save the generated vectors in both binary and textual format

#### Config 1.

configuration settings: Default

In [14]:
# using lab 9 code, below code being straight from OWl2Vec_Standalone.py
# running on base_ontology.owl
# embedding_dir = ./output_embedding/created_ontology.embeddings
# config 1 - default OWL2vec* setting

import os
import sys
import time
import argparse
import random
import multiprocessing
import gensim
import configparser

sys.path.append('./rdf2vec/')
sys.path.append('./lib/')
from RDF2Vec_Embed import get_rdf2vec_walks
from Label import pre_process_words, URI_parse
from Onto_Projection import Reasoner, OntologyProjection

parser = argparse.ArgumentParser()
parser.add_argument("--ontology_file", type=str, default=None, help="The input ontology for embedding")
parser.add_argument("--embedding_dir", type=str, default=None, help="The output embedding directory")
parser.add_argument("--config_file", type=str, default='default.cfg', help="Configuration file")
parser.add_argument("--URI_Doc", help="Using URI document", action="store_true")
parser.add_argument("--Lit_Doc", help="Using literal document", action="store_true")
parser.add_argument("--Mix_Doc", help="Using mixture document", action="store_true")
FLAGS, unparsed = parser.parse_known_args()

# read and combine configurations
# overwrite the parameters in the configuration file by the command parameters
config = configparser.ConfigParser()
config.read(FLAGS.config_file)
if FLAGS.ontology_file is not None:
    config['BASIC']['ontology_file'] = FLAGS.ontology_file
if FLAGS.embedding_dir is not None:
    config['BASIC']['embedding_dir'] = FLAGS.embedding_dir
if FLAGS.URI_Doc:
    config['DOCUMENT']['URI_Doc'] = 'yes'
if FLAGS.Lit_Doc:
    config['DOCUMENT']['Lit_Doc'] = 'yes'
if FLAGS.Mix_Doc:
    config['DOCUMENT']['Mix_Doc'] = 'yes'
if 'cache_dir' not in config['DOCUMENT']:
    config['DOCUMENT']['cache_dir'] = './cache'
if 'embedding_dir' not in config['BASIC']:
    config['BASIC']['embedding_dir'] = os.path.join(config['DOCUMENT']['cache_dir'], 'output')

start_time = time.time()
if ('ontology_projection' in config['DOCUMENT'] and config['DOCUMENT']['ontology_projection'] == 'yes') or \
        'pre_entity_file' not in config['DOCUMENT'] or 'pre_axiom_file' not in config['DOCUMENT'] or \
        'pre_annotation_file' not in config['DOCUMENT']:
    print('\n Access the ontology ...')
    projection = OntologyProjection(config['BASIC']['ontology_file'], reasoner=Reasoner.STRUCTURAL, only_taxonomy=False,
                                    bidirectional_taxonomy=True, include_literals=True, avoid_properties=set(),
                                    additional_preferred_labels_annotations=set(),
                                    additional_synonyms_annotations=set(),
                                    memory_reasoner='13351')
else:
    projection = None

# Ontology projection
if 'ontology_projection' in config['DOCUMENT'] and config['DOCUMENT']['ontology_projection'] == 'yes':
    print('\nCalculate the ontology projection ...')
    projection.extractProjection()
    onto_projection_file = os.path.join(config['DOCUMENT']['cache_dir'], 'projection.ttl')
    projection.saveProjectionGraph(onto_projection_file)
    ontology_file = onto_projection_file
else:
    ontology_file = config['BASIC']['ontology_file']

# Extract and save seed entities (classes and individuals)
# Or read entities specified by the user
if 'pre_entity_file' in config['DOCUMENT']:
    entities = [line.strip() for line in open(config['DOCUMENT']['pre_entity_file']).readlines()]
else:
    print('\nExtract classes and individuals ...')
    projection.extractEntityURIs()
    classes = projection.getClassURIs()
    individuals = projection.getIndividualURIs()
    entities = classes.union(individuals)
    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'entities.txt'), 'w') as f:
        for e in entities:
            f.write('%s\n' % e)

# Extract axioms in Manchester Syntax if it is not pre_axiom_file is not set
if 'pre_axiom_file' not in config['DOCUMENT']:
    print('\nExtract axioms ...')
    projection.createManchesterSyntaxAxioms()
    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt'), 'w') as f:
        for ax in projection.axioms_manchester:
            f.write('%s\n' % ax)

# If pre_annotation_file is set, directly read annotations
# else, read annotations including rdfs:label and other literals from the ontology
#   Extract annotations: 1) English label of each entity, by rdfs:label or skos:preferredLabel
#                        2) None label annotations as sentences of the literal document
uri_label, annotations = dict(), list()

if 'pre_annotation_file' in config['DOCUMENT']:
    with open(config['DOCUMENT']['pre_annotation_file']) as f:
        for line in f.readlines():
            tmp = line.strip().split()
            if tmp[1] == 'http://www.w3.org/2000/01/rdf-schema#label':
                uri_label[tmp[0]] = pre_process_words(tmp[2:])
            else:
                annotations.append([tmp[0]] + tmp[2:])

else:
    print('\nExtract annotations ...')
    projection.indexAnnotations()
    for e in entities:
        if e in projection.entityToPreferredLabels and len(projection.entityToPreferredLabels[e]) > 0:
            label = list(projection.entityToPreferredLabels[e])[0]
            uri_label[e] = pre_process_words(words=label.split())
    for e in entities:
        if e in projection.entityToAllLexicalLabels:
            for v in projection.entityToAllLexicalLabels[e]:
                if (v is not None) and \
                        (not (e in projection.entityToPreferredLabels and v in projection.entityToPreferredLabels[e])):
                    annotation = [e] + v.split()
                    annotations.append(annotation)

    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'annotations.txt'), 'w') as f:
        for e in projection.entityToPreferredLabels:
            for v in projection.entityToPreferredLabels[e]:
                f.write('%s preferred_label %s\n' % (e, v))
        for a in annotations:
            f.write('%s\n' % ' '.join(a))


# read URI document
# two parts: walks, axioms (if the axiom file exists)
walk_sentences, axiom_sentences, URI_Doc = list(), list(), list()
if 'URI_Doc' in config['DOCUMENT'] and config['DOCUMENT']['URI_Doc'] == 'yes':
    print('\nGenerate URI document ...')
    #walker_type=config['DOCUMENT']['walker']
    walks_ = get_rdf2vec_walks(onto_file=ontology_file, walker_type=config['DOCUMENT']['walker'],
                               walk_depth=int(config['DOCUMENT']['walk_depth']), classes=entities)
    print('Extracted %d walks for %d seed entities' % (len(walks_), len(entities)))
    walk_sentences += [list(map(str, x)) for x in walks_]

    axiom_file = os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt')
    if os.path.exists(axiom_file):
        for line in open(axiom_file).readlines():
            axiom_sentence = [item for item in line.strip().split()]
            axiom_sentences.append(axiom_sentence)
    print('Extracted %d axiom sentences' % len(axiom_sentences))
    URI_Doc = walk_sentences + axiom_sentences


# Some entities have English labels
# Keep the name of built-in properties (those starting with http://www.w3.org)
# Some entities have no labels, then use the words in their URI name
def label_item(item):
    if item in uri_label:
        return uri_label[item]
    elif item.startswith('http://www.w3.org'):
        return [item.split('#')[1].lower()]
    elif item.startswith('http://'):
        return URI_parse(uri=item)
    else:
        return [item.lower()]


# read literal document
# two parts: literals in the annotations (subject's label + literal words)
#            replacing walk/axiom sentences by words in their labels
Lit_Doc = list()
if 'Lit_Doc' in config['DOCUMENT'] and config['DOCUMENT']['Lit_Doc'] == 'yes':
    print('\nGenerate literal document ...')
    for annotation in annotations:
        processed_words = pre_process_words(annotation[1:])
        if len(processed_words) > 0:
            Lit_Doc.append(label_item(item=annotation[0]) + processed_words)
    print('Extracted %d annotation sentences' % len(Lit_Doc))

    for sentence in walk_sentences:
        lit_sentence = list()
        for item in sentence:
            lit_sentence += label_item(item=item)
        Lit_Doc.append(lit_sentence)

    for sentence in axiom_sentences:
        lit_sentence = list()
        for item in sentence:
            lit_sentence += label_item(item=item)
        Lit_Doc.append(lit_sentence)

# read mixture document
# for each axiom/walk sentence, all): for each entity, keep its entity URI, replace the others by label words
#                            random): randomly select one entity, keep its entity URI, replace the others by label words
Mix_Doc = list()
if 'Mix_Doc' in config['DOCUMENT'] and config['DOCUMENT']['Mix_Doc'] == 'yes':
    print('\nGenerate mixture document ...')
    for sentence in walk_sentences + axiom_sentences:
        if config['DOCUMENT']['Mix_Type'] == 'all':
            for index in range(len(sentence)):
                mix_sentence = list()
                for i, item in enumerate(sentence):
                    mix_sentence += [item] if i == index else label_item(item=item)
                Mix_Doc.append(mix_sentence)
        elif config['DOCUMENT']['Mix_Type'] == 'random':
            random_index = random.randint(0, len(sentence) - 1)
            mix_sentence = list()
            for i, item in enumerate(sentence):
                mix_sentence += [item] if i == random_index else label_item(item=item)
            Mix_Doc.append(mix_sentence)

print('URI_Doc: %d, Lit_Doc: %d, Mix_Doc: %d' % (len(URI_Doc), len(Lit_Doc), len(Mix_Doc)))
all_doc = URI_Doc + Lit_Doc + Mix_Doc


print('Time for document construction: %s seconds' % (time.time() - start_time))
random.shuffle(all_doc)


#Save all_doc
with open(os.path.join(config['DOCUMENT']['cache_dir'], 'document_sentences.txt'), 'w') as f:
    for sentence in all_doc:
        for w in sentence:
            f.write('%s ' % w)
        f.write('\n')
    f.close()


# learn the language model (train a new model or fine tune the pre-trained model)
start_time = time.time()
if 'pre_train_model' not in config['MODEL'] or not os.path.exists(config['MODEL']['pre_train_model']):
    print('\nTrain the language model ...')
    model_ = gensim.models.Word2Vec(all_doc, size=int(config['MODEL']['embed_size']),
                                    window=int(config['MODEL']['window']),
                                    workers=multiprocessing.cpu_count(),
                                    sg=1, iter=int(config['MODEL']['iteration']),
                                    negative=int(config['MODEL']['negative']),
                                    min_count=int(config['MODEL']['min_count']), seed=int(config['MODEL']['seed']))
else:
    print('\nFine-tune the pre-trained language model ...')
    model_ = gensim.models.Word2Vec.load(config['MODEL']['pre_train_model'])
    if len(all_doc) > 0:
        model_.min_count = int(config['MODEL']['min_count'])
        model_.build_vocab(all_doc, update=True)
        model_.train(all_doc, total_examples=model_.corpus_count, epochs=int(config['MODEL']['epoch']))

model_.save(config['BASIC']['embedding_dir'])

model_.wv.save_word2vec_format(config['BASIC']['embedding_dir']+".txt", binary=False)
model_.wv.save_word2vec_format(config['BASIC']['embedding_dir']+".bin", binary=True)

print('Time for learning the language model: %s seconds' % (time.time() - start_time))
print('Model saved. Done!')



INFO: There are 302 triples in the ontology
INFO: Creating ontology graph projection...
INFO: 	Extracting subsumption triples
INFO: 		Time extracting subsumption: 0.05426287651062012 seconds 
INFO: 	Extracting equivalence triples
INFO: 		Time extracting equivalences: 0.01941680908203125 seconds 
INFO: 	Extracting class membership triples.



 Access the ontology ...

Calculate the ontology projection ...


INFO: 		Time extracting class membership: 0.13052010536193848 seconds 
INFO: 	Extracting sameAs triples
INFO: 		Time extracting sameAs: 0.008331060409545898 seconds 
INFO: 	Extracting triples associated to Has_base
INFO: 		Time extracting triples for property: 0.15897607803344727 seconds 
INFO: 	Extracting triples associated to Has_ingredient
INFO: 		Time extracting triples for property: 0.15742897987365723 seconds 
INFO: 	Extracting triples associated to Has_location
INFO: 		Time extracting triples for property: 0.17687296867370605 seconds 
INFO: 	Extracting triples associated to Has_topping
INFO: 		Time extracting triples for property: 0.17138409614562988 seconds 
INFO: 	Extracting triples associated to Is_base_of
INFO: 		Time extracting triples for property: 0.16196084022521973 seconds 
INFO: 	Extracting triples associated to Is_ingredient_of
INFO: 		Time extracting triples for property: 0.15981292724609375 seconds 
INFO: 	Extracting triples associated to Is_located_in
INFO: 		Time 


Extract classes and individuals ...

Extract axioms ...

Extract annotations ...


INFO: collecting all words and their counts
INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO: collected 210 word types from a corpus of 12418 raw words and 2356 sentences
INFO: Loading a fresh vocabulary
INFO: effective_min_count=1 retains 210 unique words (100% of original 210, drops 0)
INFO: effective_min_count=1 leaves 12418 word corpus (100% of original 12418, drops 0)
INFO: deleting the raw counts dictionary of 210 items
INFO: sample=0.001 downsamples 80 most-common words
INFO: downsampling leaves estimated 5154 word corpus (41.5% of prior 12418)
INFO: estimated required memory for 210 words and 100 dimensions: 273000 bytes
INFO: resetting layer weights
INFO: training model with 4 workers on 210 vocabulary and 100 features, using sg=1 hs=0 sample=0.001 negative=25 window=5
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
I


Generate URI document ...
Extracted 716 walks for 50 seed entities
Extracted 67 axiom sentences

Generate literal document ...
Extracted 7 annotation sentences

Generate mixture document ...
URI_Doc: 783, Lit_Doc: 790, Mix_Doc: 783
Time for document construction: 2.771167039871216 seconds

Train the language model ...


DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 1 : training on 12418 raw words (5143 effective words) took 0.0s, 130017 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
INFO: worker thread finished; awaiting finish of 2 more threads
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 2 : training on 12418 raw words (5188 effective words) took 0.1s, 88657 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
INFO: worker thread finished; awaiting finish of 2 

Time for learning the language model: 0.596937894821167 seconds
Model saved. Done!


#### config 2.

Owl2vec* configuration changes:

> walker = wl

> walk depth = 2

Embeddings saved with "2" at the end of the files.

In [15]:
# using lab 9 code, below code being straight from OWl2Vec_Standalone.py
# running on ontology_with_data
# embedding_dir = ./output_embedding2/created_ontology.embeddings

import os
import sys
import time
import argparse
import random
import multiprocessing
import gensim
import configparser

sys.path.append('./rdf2vec/')
sys.path.append('./lib/')
from RDF2Vec_Embed import get_rdf2vec_walks
from Label import pre_process_words, URI_parse
from Onto_Projection import Reasoner, OntologyProjection

parser = argparse.ArgumentParser()
parser.add_argument("--ontology_file", type=str, default=None, help="The input ontology for embedding")
parser.add_argument("--embedding_dir", type=str, default=None, help="The output embedding directory")
parser.add_argument("--config_file", type=str, default='default.cfg', help="Configuration file")
parser.add_argument("--URI_Doc", help="Using URI document", action="store_true")
parser.add_argument("--Lit_Doc", help="Using literal document", action="store_true")
parser.add_argument("--Mix_Doc", help="Using mixture document", action="store_true")
FLAGS, unparsed = parser.parse_known_args()

# read and combine configurations
# overwrite the parameters in the configuration file by the command parameters
config = configparser.ConfigParser()
config.read(FLAGS.config_file)
if FLAGS.ontology_file is not None:
    config['BASIC']['ontology_file'] = FLAGS.ontology_file
if FLAGS.embedding_dir is not None:
    config['BASIC']['embedding_dir'] = FLAGS.embedding_dir
if FLAGS.URI_Doc:
    config['DOCUMENT']['URI_Doc'] = 'yes'
if FLAGS.Lit_Doc:
    config['DOCUMENT']['Lit_Doc'] = 'yes'
if FLAGS.Mix_Doc:
    config['DOCUMENT']['Mix_Doc'] = 'yes'
if 'cache_dir' not in config['DOCUMENT']:
    config['DOCUMENT']['cache_dir'] = './cache'
if 'embedding_dir' not in config['BASIC']:
    config['BASIC']['embedding_dir'] = os.path.join(config['DOCUMENT']['cache_dir'], 'output')

start_time = time.time()
if ('ontology_projection' in config['DOCUMENT'] and config['DOCUMENT']['ontology_projection'] == 'yes') or \
        'pre_entity_file' not in config['DOCUMENT'] or 'pre_axiom_file' not in config['DOCUMENT'] or \
        'pre_annotation_file' not in config['DOCUMENT']:
    print('\n Access the ontology ...')
    projection = OntologyProjection(config['BASIC']['ontology_file'], reasoner=Reasoner.STRUCTURAL, only_taxonomy=False,
                                    bidirectional_taxonomy=True, include_literals=True, avoid_properties=set(),
                                    additional_preferred_labels_annotations=set(),
                                    additional_synonyms_annotations=set(),
                                    memory_reasoner='13351')
else:
    projection = None

# Ontology projection
if 'ontology_projection' in config['DOCUMENT'] and config['DOCUMENT']['ontology_projection'] == 'yes':
    print('\nCalculate the ontology projection ...')
    projection.extractProjection()
    onto_projection_file = os.path.join(config['DOCUMENT']['cache_dir'], 'projection.ttl')
    projection.saveProjectionGraph(onto_projection_file)
    ontology_file = onto_projection_file
else:
    ontology_file = config['BASIC']['ontology_file']

# Extract and save seed entities (classes and individuals)
# Or read entities specified by the user
if 'pre_entity_file' in config['DOCUMENT']:
    entities = [line.strip() for line in open(config['DOCUMENT']['pre_entity_file']).readlines()]
else:
    print('\nExtract classes and individuals ...')
    projection.extractEntityURIs()
    classes = projection.getClassURIs()
    individuals = projection.getIndividualURIs()
    entities = classes.union(individuals)
    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'entities.txt'), 'w') as f:
        for e in entities:
            f.write('%s\n' % e)

# Extract axioms in Manchester Syntax if it is not pre_axiom_file is not set
if 'pre_axiom_file' not in config['DOCUMENT']:
    print('\nExtract axioms ...')
    projection.createManchesterSyntaxAxioms()
    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt'), 'w') as f:
        for ax in projection.axioms_manchester:
            f.write('%s\n' % ax)

# If pre_annotation_file is set, directly read annotations
# else, read annotations including rdfs:label and other literals from the ontology
#   Extract annotations: 1) English label of each entity, by rdfs:label or skos:preferredLabel
#                        2) None label annotations as sentences of the literal document
uri_label, annotations = dict(), list()

if 'pre_annotation_file' in config['DOCUMENT']:
    with open(config['DOCUMENT']['pre_annotation_file']) as f:
        for line in f.readlines():
            tmp = line.strip().split()
            if tmp[1] == 'http://www.w3.org/2000/01/rdf-schema#label':
                uri_label[tmp[0]] = pre_process_words(tmp[2:])
            else:
                annotations.append([tmp[0]] + tmp[2:])

else:
    print('\nExtract annotations ...')
    projection.indexAnnotations()
    for e in entities:
        if e in projection.entityToPreferredLabels and len(projection.entityToPreferredLabels[e]) > 0:
            label = list(projection.entityToPreferredLabels[e])[0]
            uri_label[e] = pre_process_words(words=label.split())
    for e in entities:
        if e in projection.entityToAllLexicalLabels:
            for v in projection.entityToAllLexicalLabels[e]:
                if (v is not None) and \
                        (not (e in projection.entityToPreferredLabels and v in projection.entityToPreferredLabels[e])):
                    annotation = [e] + v.split()
                    annotations.append(annotation)

    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'annotations.txt'), 'w') as f:
        for e in projection.entityToPreferredLabels:
            for v in projection.entityToPreferredLabels[e]:
                f.write('%s preferred_label %s\n' % (e, v))
        for a in annotations:
            f.write('%s\n' % ' '.join(a))


# read URI document
# two parts: walks, axioms (if the axiom file exists)
walk_sentences, axiom_sentences, URI_Doc = list(), list(), list()
if 'URI_Doc' in config['DOCUMENT'] and config['DOCUMENT']['URI_Doc'] == 'yes':
    print('\nGenerate URI document ...')
    #walker_type=config['DOCUMENT']['walker']
    walks_ = get_rdf2vec_walks(onto_file=ontology_file, walker_type=config['DOCUMENT']['walker'],
                               walk_depth=int(config['DOCUMENT']['walk_depth']), classes=entities)
    print('Extracted %d walks for %d seed entities' % (len(walks_), len(entities)))
    walk_sentences += [list(map(str, x)) for x in walks_]

    axiom_file = os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt')
    if os.path.exists(axiom_file):
        for line in open(axiom_file).readlines():
            axiom_sentence = [item for item in line.strip().split()]
            axiom_sentences.append(axiom_sentence)
    print('Extracted %d axiom sentences' % len(axiom_sentences))
    URI_Doc = walk_sentences + axiom_sentences


# Some entities have English labels
# Keep the name of built-in properties (those starting with http://www.w3.org)
# Some entities have no labels, then use the words in their URI name
def label_item(item):
    if item in uri_label:
        return uri_label[item]
    elif item.startswith('http://www.w3.org'):
        return [item.split('#')[1].lower()]
    elif item.startswith('http://'):
        return URI_parse(uri=item)
    else:
        return [item.lower()]


# read literal document
# two parts: literals in the annotations (subject's label + literal words)
#            replacing walk/axiom sentences by words in their labels
Lit_Doc = list()
if 'Lit_Doc' in config['DOCUMENT'] and config['DOCUMENT']['Lit_Doc'] == 'yes':
    print('\nGenerate literal document ...')
    for annotation in annotations:
        processed_words = pre_process_words(annotation[1:])
        if len(processed_words) > 0:
            Lit_Doc.append(label_item(item=annotation[0]) + processed_words)
    print('Extracted %d annotation sentences' % len(Lit_Doc))

    for sentence in walk_sentences:
        lit_sentence = list()
        for item in sentence:
            lit_sentence += label_item(item=item)
        Lit_Doc.append(lit_sentence)

    for sentence in axiom_sentences:
        lit_sentence = list()
        for item in sentence:
            lit_sentence += label_item(item=item)
        Lit_Doc.append(lit_sentence)

# read mixture document
# for each axiom/walk sentence, all): for each entity, keep its entity URI, replace the others by label words
#                            random): randomly select one entity, keep its entity URI, replace the others by label words
Mix_Doc = list()
if 'Mix_Doc' in config['DOCUMENT'] and config['DOCUMENT']['Mix_Doc'] == 'yes':
    print('\nGenerate mixture document ...')
    for sentence in walk_sentences + axiom_sentences:
        if config['DOCUMENT']['Mix_Type'] == 'all':
            for index in range(len(sentence)):
                mix_sentence = list()
                for i, item in enumerate(sentence):
                    mix_sentence += [item] if i == index else label_item(item=item)
                Mix_Doc.append(mix_sentence)
        elif config['DOCUMENT']['Mix_Type'] == 'random':
            random_index = random.randint(0, len(sentence) - 1)
            mix_sentence = list()
            for i, item in enumerate(sentence):
                mix_sentence += [item] if i == random_index else label_item(item=item)
            Mix_Doc.append(mix_sentence)

print('URI_Doc: %d, Lit_Doc: %d, Mix_Doc: %d' % (len(URI_Doc), len(Lit_Doc), len(Mix_Doc)))
all_doc = URI_Doc + Lit_Doc + Mix_Doc


print('Time for document construction: %s seconds' % (time.time() - start_time))
random.shuffle(all_doc)


#Save all_doc
with open(os.path.join(config['DOCUMENT']['cache_dir'], 'document_sentences.txt'), 'w') as f:
    for sentence in all_doc:
        for w in sentence:
            f.write('%s ' % w)
        f.write('\n')
    f.close()


# learn the language model (train a new model or fine tune the pre-trained model)
start_time = time.time()
if 'pre_train_model' not in config['MODEL'] or not os.path.exists(config['MODEL']['pre_train_model']):
    print('\nTrain the language model ...')
    model_ = gensim.models.Word2Vec(all_doc, size=int(config['MODEL']['embed_size']),
                                    window=int(config['MODEL']['window']),
                                    workers=multiprocessing.cpu_count(),
                                    sg=1, iter=int(config['MODEL']['iteration']),
                                    negative=int(config['MODEL']['negative']),
                                    min_count=int(config['MODEL']['min_count']), seed=int(config['MODEL']['seed']))
else:
    print('\nFine-tune the pre-trained language model ...')
    model_ = gensim.models.Word2Vec.load(config['MODEL']['pre_train_model'])
    if len(all_doc) > 0:
        model_.min_count = int(config['MODEL']['min_count'])
        model_.build_vocab(all_doc, update=True)
        model_.train(all_doc, total_examples=model_.corpus_count, epochs=int(config['MODEL']['epoch']))

model_.save(config['BASIC']['embedding_dir']+"2")

model_.wv.save_word2vec_format(config['BASIC']['embedding_dir']+"2.txt", binary=False) # <-- differing name of embeddings
model_.wv.save_word2vec_format(config['BASIC']['embedding_dir']+"2.bin", binary=True)

print('Time for learning the language model: %s seconds' % (time.time() - start_time))
print('Model saved. Done!')



INFO: There are 302 triples in the ontology
INFO: Creating ontology graph projection...
INFO: 	Extracting subsumption triples
INFO: 		Time extracting subsumption: 0.06309795379638672 seconds 
INFO: 	Extracting equivalence triples
INFO: 		Time extracting equivalences: 0.021349191665649414 seconds 
INFO: 	Extracting class membership triples.



 Access the ontology ...

Calculate the ontology projection ...


INFO: 		Time extracting class membership: 0.12949681282043457 seconds 
INFO: 	Extracting sameAs triples
INFO: 		Time extracting sameAs: 0.008658885955810547 seconds 
INFO: 	Extracting triples associated to Has_base
INFO: 		Time extracting triples for property: 0.13617682456970215 seconds 
INFO: 	Extracting triples associated to Has_ingredient
INFO: 		Time extracting triples for property: 0.13710308074951172 seconds 
INFO: 	Extracting triples associated to Has_location
INFO: 		Time extracting triples for property: 0.21639418601989746 seconds 
INFO: 	Extracting triples associated to Has_topping
INFO: 		Time extracting triples for property: 0.16829490661621094 seconds 
INFO: 	Extracting triples associated to Is_base_of
INFO: 		Time extracting triples for property: 0.15784287452697754 seconds 
INFO: 	Extracting triples associated to Is_ingredient_of
INFO: 		Time extracting triples for property: 0.1461331844329834 seconds 
INFO: 	Extracting triples associated to Is_located_in
INFO: 		Time e


Extract classes and individuals ...

Extract axioms ...

Extract annotations ...

Generate URI document ...
Extracted 815 walks for 50 seed entities
Extracted 67 axiom sentences

Generate literal document ...
Extracted 7 annotation sentences

Generate mixture document ...

INFO: collecting all words and their counts
INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO: collected 820 word types from a corpus of 8748 raw words and 2653 sentences
INFO: Loading a fresh vocabulary
INFO: effective_min_count=1 retains 820 unique words (100% of original 820, drops 0)
INFO: effective_min_count=1 leaves 8748 word corpus (100% of original 8748, drops 0)
INFO: deleting the raw counts dictionary of 820 items
INFO: sample=0.001 downsamples 64 most-common words
INFO: downsampling leaves estimated 4930 word corpus (56.4% of prior 8748)
INFO: estimated required memory for 820 words and 100 dimensions: 1066000 bytes
INFO: resetting layer weights



URI_Doc: 882, Lit_Doc: 889, Mix_Doc: 882
Time for document construction: 2.684782028198242 seconds

Train the language model ...


INFO: training model with 4 workers on 820 vocabulary and 100 features, using sg=1 hs=0 sample=0.001 negative=25 window=5
DEBUG: job loop exiting, total 1 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 1 : training on 8748 raw words (4926 effective words) took 0.0s, 166259 effective words/s
DEBUG: job loop exiting, total 1 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awai

Time for learning the language model: 0.7870798110961914 seconds
Model saved. Done!


#### config 3. 

Owl2vec* configuration changes:

> number of iterations in training the language model: 100

> window size for gensim word2vec model: 10

Embedding results saved with "3" at the end of files.

In [17]:
# using lab 9 code, below code being straight from OWl2Vec_Standalone.py
# running on ontology_with_data
# embedding_dir = ./output_embedding2/created_ontology.embeddings

import os
import sys
import time
import argparse
import random
import multiprocessing
import gensim
import configparser

sys.path.append('./rdf2vec/')
sys.path.append('./lib/')
from RDF2Vec_Embed import get_rdf2vec_walks
from Label import pre_process_words, URI_parse
from Onto_Projection import Reasoner, OntologyProjection

parser = argparse.ArgumentParser()
parser.add_argument("--ontology_file", type=str, default=None, help="The input ontology for embedding")
parser.add_argument("--embedding_dir", type=str, default=None, help="The output embedding directory")
parser.add_argument("--config_file", type=str, default='default.cfg', help="Configuration file")
parser.add_argument("--URI_Doc", help="Using URI document", action="store_true")
parser.add_argument("--Lit_Doc", help="Using literal document", action="store_true")
parser.add_argument("--Mix_Doc", help="Using mixture document", action="store_true")
FLAGS, unparsed = parser.parse_known_args()

# read and combine configurations
# overwrite the parameters in the configuration file by the command parameters
config = configparser.ConfigParser()
config.read(FLAGS.config_file)
if FLAGS.ontology_file is not None:
    config['BASIC']['ontology_file'] = FLAGS.ontology_file
if FLAGS.embedding_dir is not None:
    config['BASIC']['embedding_dir'] = FLAGS.embedding_dir
if FLAGS.URI_Doc:
    config['DOCUMENT']['URI_Doc'] = 'yes'
if FLAGS.Lit_Doc:
    config['DOCUMENT']['Lit_Doc'] = 'yes'
if FLAGS.Mix_Doc:
    config['DOCUMENT']['Mix_Doc'] = 'yes'
if 'cache_dir' not in config['DOCUMENT']:
    config['DOCUMENT']['cache_dir'] = './cache'
if 'embedding_dir' not in config['BASIC']:
    config['BASIC']['embedding_dir'] = os.path.join(config['DOCUMENT']['cache_dir'], 'output')

start_time = time.time()
if ('ontology_projection' in config['DOCUMENT'] and config['DOCUMENT']['ontology_projection'] == 'yes') or \
        'pre_entity_file' not in config['DOCUMENT'] or 'pre_axiom_file' not in config['DOCUMENT'] or \
        'pre_annotation_file' not in config['DOCUMENT']:
    print('\n Access the ontology ...')
    projection = OntologyProjection(config['BASIC']['ontology_file'], reasoner=Reasoner.STRUCTURAL, only_taxonomy=False,
                                    bidirectional_taxonomy=True, include_literals=True, avoid_properties=set(),
                                    additional_preferred_labels_annotations=set(),
                                    additional_synonyms_annotations=set(),
                                    memory_reasoner='13351')
else:
    projection = None

# Ontology projection
if 'ontology_projection' in config['DOCUMENT'] and config['DOCUMENT']['ontology_projection'] == 'yes':
    print('\nCalculate the ontology projection ...')
    projection.extractProjection()
    onto_projection_file = os.path.join(config['DOCUMENT']['cache_dir'], 'projection.ttl')
    projection.saveProjectionGraph(onto_projection_file)
    ontology_file = onto_projection_file
else:
    ontology_file = config['BASIC']['ontology_file']

# Extract and save seed entities (classes and individuals)
# Or read entities specified by the user
if 'pre_entity_file' in config['DOCUMENT']:
    entities = [line.strip() for line in open(config['DOCUMENT']['pre_entity_file']).readlines()]
else:
    print('\nExtract classes and individuals ...')
    projection.extractEntityURIs()
    classes = projection.getClassURIs()
    individuals = projection.getIndividualURIs()
    entities = classes.union(individuals)
    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'entities.txt'), 'w') as f:
        for e in entities:
            f.write('%s\n' % e)

# Extract axioms in Manchester Syntax if it is not pre_axiom_file is not set
if 'pre_axiom_file' not in config['DOCUMENT']:
    print('\nExtract axioms ...')
    projection.createManchesterSyntaxAxioms()
    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt'), 'w') as f:
        for ax in projection.axioms_manchester:
            f.write('%s\n' % ax)

# If pre_annotation_file is set, directly read annotations
# else, read annotations including rdfs:label and other literals from the ontology
#   Extract annotations: 1) English label of each entity, by rdfs:label or skos:preferredLabel
#                        2) None label annotations as sentences of the literal document
uri_label, annotations = dict(), list()

if 'pre_annotation_file' in config['DOCUMENT']:
    with open(config['DOCUMENT']['pre_annotation_file']) as f:
        for line in f.readlines():
            tmp = line.strip().split()
            if tmp[1] == 'http://www.w3.org/2000/01/rdf-schema#label':
                uri_label[tmp[0]] = pre_process_words(tmp[2:])
            else:
                annotations.append([tmp[0]] + tmp[2:])

else:
    print('\nExtract annotations ...')
    projection.indexAnnotations()
    for e in entities:
        if e in projection.entityToPreferredLabels and len(projection.entityToPreferredLabels[e]) > 0:
            label = list(projection.entityToPreferredLabels[e])[0]
            uri_label[e] = pre_process_words(words=label.split())
    for e in entities:
        if e in projection.entityToAllLexicalLabels:
            for v in projection.entityToAllLexicalLabels[e]:
                if (v is not None) and \
                        (not (e in projection.entityToPreferredLabels and v in projection.entityToPreferredLabels[e])):
                    annotation = [e] + v.split()
                    annotations.append(annotation)

    with open(os.path.join(config['DOCUMENT']['cache_dir'], 'annotations.txt'), 'w') as f:
        for e in projection.entityToPreferredLabels:
            for v in projection.entityToPreferredLabels[e]:
                f.write('%s preferred_label %s\n' % (e, v))
        for a in annotations:
            f.write('%s\n' % ' '.join(a))


# read URI document
# two parts: walks, axioms (if the axiom file exists)
walk_sentences, axiom_sentences, URI_Doc = list(), list(), list()
if 'URI_Doc' in config['DOCUMENT'] and config['DOCUMENT']['URI_Doc'] == 'yes':
    print('\nGenerate URI document ...')
    #walker_type=config['DOCUMENT']['walker']
    walks_ = get_rdf2vec_walks(onto_file=ontology_file, walker_type=config['DOCUMENT']['walker'],
                               walk_depth=int(config['DOCUMENT']['walk_depth']), classes=entities)
    print('Extracted %d walks for %d seed entities' % (len(walks_), len(entities)))
    walk_sentences += [list(map(str, x)) for x in walks_]

    axiom_file = os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt')
    if os.path.exists(axiom_file):
        for line in open(axiom_file).readlines():
            axiom_sentence = [item for item in line.strip().split()]
            axiom_sentences.append(axiom_sentence)
    print('Extracted %d axiom sentences' % len(axiom_sentences))
    URI_Doc = walk_sentences + axiom_sentences


# Some entities have English labels
# Keep the name of built-in properties (those starting with http://www.w3.org)
# Some entities have no labels, then use the words in their URI name
def label_item(item):
    if item in uri_label:
        return uri_label[item]
    elif item.startswith('http://www.w3.org'):
        return [item.split('#')[1].lower()]
    elif item.startswith('http://'):
        return URI_parse(uri=item)
    else:
        return [item.lower()]


# read literal document
# two parts: literals in the annotations (subject's label + literal words)
#            replacing walk/axiom sentences by words in their labels
Lit_Doc = list()
if 'Lit_Doc' in config['DOCUMENT'] and config['DOCUMENT']['Lit_Doc'] == 'yes':
    print('\nGenerate literal document ...')
    for annotation in annotations:
        processed_words = pre_process_words(annotation[1:])
        if len(processed_words) > 0:
            Lit_Doc.append(label_item(item=annotation[0]) + processed_words)
    print('Extracted %d annotation sentences' % len(Lit_Doc))

    for sentence in walk_sentences:
        lit_sentence = list()
        for item in sentence:
            lit_sentence += label_item(item=item)
        Lit_Doc.append(lit_sentence)

    for sentence in axiom_sentences:
        lit_sentence = list()
        for item in sentence:
            lit_sentence += label_item(item=item)
        Lit_Doc.append(lit_sentence)

# read mixture document
# for each axiom/walk sentence, all): for each entity, keep its entity URI, replace the others by label words
#                            random): randomly select one entity, keep its entity URI, replace the others by label words
Mix_Doc = list()
if 'Mix_Doc' in config['DOCUMENT'] and config['DOCUMENT']['Mix_Doc'] == 'yes':
    print('\nGenerate mixture document ...')
    for sentence in walk_sentences + axiom_sentences:
        if config['DOCUMENT']['Mix_Type'] == 'all':
            for index in range(len(sentence)):
                mix_sentence = list()
                for i, item in enumerate(sentence):
                    mix_sentence += [item] if i == index else label_item(item=item)
                Mix_Doc.append(mix_sentence)
        elif config['DOCUMENT']['Mix_Type'] == 'random':
            random_index = random.randint(0, len(sentence) - 1)
            mix_sentence = list()
            for i, item in enumerate(sentence):
                mix_sentence += [item] if i == random_index else label_item(item=item)
            Mix_Doc.append(mix_sentence)

print('URI_Doc: %d, Lit_Doc: %d, Mix_Doc: %d' % (len(URI_Doc), len(Lit_Doc), len(Mix_Doc)))
all_doc = URI_Doc + Lit_Doc + Mix_Doc


print('Time for document construction: %s seconds' % (time.time() - start_time))
random.shuffle(all_doc)


#Save all_doc
with open(os.path.join(config['DOCUMENT']['cache_dir'], 'document_sentences.txt'), 'w') as f:
    for sentence in all_doc:
        for w in sentence:
            f.write('%s ' % w)
        f.write('\n')
    f.close()


# learn the language model (train a new model or fine tune the pre-trained model)
start_time = time.time()
if 'pre_train_model' not in config['MODEL'] or not os.path.exists(config['MODEL']['pre_train_model']):
    print('\nTrain the language model ...')
    model_ = gensim.models.Word2Vec(all_doc, size=int(config['MODEL']['embed_size']),
                                    window=int(config['MODEL']['window']),
                                    workers=multiprocessing.cpu_count(),
                                    sg=1, iter=int(config['MODEL']['iteration']),
                                    negative=int(config['MODEL']['negative']),
                                    min_count=int(config['MODEL']['min_count']), seed=int(config['MODEL']['seed']))
else:
    print('\nFine-tune the pre-trained language model ...')
    model_ = gensim.models.Word2Vec.load(config['MODEL']['pre_train_model'])
    if len(all_doc) > 0:
        model_.min_count = int(config['MODEL']['min_count'])
        model_.build_vocab(all_doc, update=True)
        model_.train(all_doc, total_examples=model_.corpus_count, epochs=int(config['MODEL']['epoch']))

model_.save(config['BASIC']['embedding_dir']+"3")

model_.wv.save_word2vec_format(config['BASIC']['embedding_dir']+"3.txt", binary=False) # <-- differing name of embeddings
model_.wv.save_word2vec_format(config['BASIC']['embedding_dir']+"3.bin", binary=True)

print('Time for learning the language model: %s seconds' % (time.time() - start_time))
print('Model saved. Done!')



INFO: There are 302 triples in the ontology
INFO: Creating ontology graph projection...
INFO: 	Extracting subsumption triples
INFO: 		Time extracting subsumption: 0.08221006393432617 seconds 
INFO: 	Extracting equivalence triples
INFO: 		Time extracting equivalences: 0.0388340950012207 seconds 
INFO: 	Extracting class membership triples.



 Access the ontology ...

Calculate the ontology projection ...


INFO: 		Time extracting class membership: 0.22005772590637207 seconds 
INFO: 	Extracting sameAs triples
INFO: 		Time extracting sameAs: 0.012752294540405273 seconds 
INFO: 	Extracting triples associated to Has_base
INFO: 		Time extracting triples for property: 0.17971420288085938 seconds 
INFO: 	Extracting triples associated to Has_ingredient
INFO: 		Time extracting triples for property: 0.16765308380126953 seconds 
INFO: 	Extracting triples associated to Has_location
INFO: 		Time extracting triples for property: 0.17580008506774902 seconds 
INFO: 	Extracting triples associated to Has_topping
INFO: 		Time extracting triples for property: 0.14914321899414062 seconds 
INFO: 	Extracting triples associated to Is_base_of
INFO: 		Time extracting triples for property: 0.15640711784362793 seconds 
INFO: 	Extracting triples associated to Is_ingredient_of
INFO: 		Time extracting triples for property: 0.19498181343078613 seconds 
INFO: 	Extracting triples associated to Is_located_in
INFO: 		Time 


Extract classes and individuals ...

Extract axioms ...

Extract annotations ...

Generate URI document ...


INFO: collecting all words and their counts
INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO: collected 210 word types from a corpus of 12427 raw words and 2356 sentences
INFO: Loading a fresh vocabulary
INFO: effective_min_count=1 retains 210 unique words (100% of original 210, drops 0)
INFO: effective_min_count=1 leaves 12427 word corpus (100% of original 12427, drops 0)
INFO: deleting the raw counts dictionary of 210 items
INFO: sample=0.001 downsamples 81 most-common words
INFO: downsampling leaves estimated 5150 word corpus (41.4% of prior 12427)
INFO: estimated required memory for 210 words and 100 dimensions: 273000 bytes
INFO: resetting layer weights
INFO: training model with 4 workers on 210 vocabulary and 100 features, using sg=1 hs=0 sample=0.001 negative=25 window=10
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads


Extracted 716 walks for 50 seed entities
Extracted 67 axiom sentences

Generate literal document ...
Extracted 7 annotation sentences

Generate mixture document ...
URI_Doc: 783, Lit_Doc: 790, Mix_Doc: 783
Time for document construction: 3.5813281536102295 seconds

Train the language model ...


INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 1 : training on 12427 raw words (5167 effective words) took 0.0s, 127666 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 2 : training on 12427 raw words (5139 effective words) took 0.1s, 100094 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, pro

DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 16 : training on 12427 raw words (5148 effective words) took 0.0s, 133218 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 17 : train

INFO: EPOCH - 30 : training on 12427 raw words (5172 effective words) took 0.0s, 158949 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 31 : training on 12427 raw words (5133 effective words) took 0.0s, 141737 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of

DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 1 more threads
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 45 : training on 12427 raw words (5227 effective words) took 0.0s, 136972 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 46 : training on 12427 raw words (5103 effective words) took 0.0s, 138653 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of

INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 60 : training on 12427 raw words (5115 effective words) took 0.0s, 128867 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 61 : training on 12427 raw words (5224 effective words) took 0.0s, 153793 effective words/

DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 75 : training on 12427 raw words (5134 effective words) took 0.0s, 135589 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0

INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 89 : training on 12427 raw words (5197 effective words) took 0.0s, 147975 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 2 more threads
INFO: worker thread finished; awaiting finish of 1 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of 0 more threads
INFO: EPOCH - 90 : training on 12427 raw words (5181 effective words) took 0.0s, 145970 effective words/s
DEBUG: job loop exiting, total 2 jobs
DEBUG: worker exiting, processed 0 jobs
DEBUG: worker exiting, processed 0 jobs
INFO: worker thread finished; awaiting finish of 3 more threads
DEBUG: worker exiting, processed 1 jobs
INFO: worker thread finished; awaiting finish of

Time for learning the language model: 4.703548192977905 seconds
Model saved. Done!


The embeddings for each of the 3 differnt configurations have been saved in the output_embedding file. In said file, a notebook can be found which will perform the rest of the ontology embedding task.