First download NCESData.zip from [hobbit](https://hobbitdata.informatik.uni-leipzig.de/NCES_Ontolearn_Data/) and extract it into the Ontolearn repository.

In [2]:
from ontolearn.concept_learner import NCES
from ontolearn.knowledge_base import KnowledgeBase
from owlapy.parser import DLSyntaxParser
from owlapy.render import DLSyntaxObjectRenderer
from ontolearn.metrics import F1, Accuracy, Precision, Recall
import time
import random

In [3]:
def quality(solution, pos, neg):
    f1 = F1().score2; accuracy = Accuracy().score2; precision = Precision().score2; recall = Recall().score2
    instances = set(KB.individuals(solution))
    if isinstance(list(pos)[0], str):
        instances = {ind.get_iri().as_str().split("/")[-1] for ind in instances}
    tp=len(pos.intersection(instances))
    fn=len((instances-pos).difference(neg))
    fp=len((instances-neg).difference(pos))
    tn=len((instances-pos).intersection(neg))
    print("Accuracy: {}%".format(100*accuracy(tp, fn, fp, tn)[-1]))
    print("Precision: {}%".format(100*precision(tp, fn, fp, tn)[-1]))
    print("Recall: {}%".format(100*recall(tp, fn, fp, tn)[-1]))
    print("F1: {}%".format(100*f1(tp, fn, fp, tn)[-1]))

In [4]:
nces = NCES(knowledge_base_path="../NCESData/family/family.owl", learner_name="SetTransformer",
     path_of_embeddings="../NCESData/family/embeddings/ConEx_entity_embeddings.csv", load_pretrained=True, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32, pretrained_model_name="SetTransformer")



 Loaded pretrained model! 



In [5]:
KB = KnowledgeBase(path=nces.knowledge_base_path)

In [6]:
dl_syntax_renderer = DLSyntaxObjectRenderer()

In [7]:
atomic_classes = [dl_syntax_renderer.render(a) for a in KB.ontology().classes_in_signature()]

In [8]:
atomic_classes

['Brother',
 'Male',
 'PersonWithASibling',
 'Child',
 'Person',
 'Daughter',
 'Female',
 'Father',
 'Parent',
 'Grandchild',
 'Granddaughter',
 'Grandfather',
 'Grandparent',
 'Grandmother',
 'Grandson',
 'Mother',
 'Sister',
 'Son']

In [9]:
dl_parser = DLSyntaxParser(nces.kb_namespace)

In [10]:
brother = dl_parser.parse('Brother')
daughter = dl_parser.parse('Daughter')

#### Input examples can be sets or lists

In [11]:
pos = set(KB.individuals(brother)).union(set(KB.individuals(daughter)))

In [12]:
neg = set(KB.individuals())-set(pos)

#### Prediction with SetTransformer (default model)

In [13]:
t0 = time.time()
concept = nces.fit(pos, neg)
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")

Prediction:  Son ⊔ Daughter ⊔ PersonWithASibling

Duration:  0.13891911506652832  seconds


In [14]:
quality(concept, pos, neg)

Accuracy: 100.0%
Precision: 100.0%
Recall: 100.0%
F1: 100.0%


### Ensemble prediction

In [14]:
nces.pretrained_model_name = ['SetTransformer','GRU','LSTM']
nces.refresh()
t0 = time.time()
concept = nces.fit(pos, neg)
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")



 Loaded pretrained model! 



 Loaded pretrained model! 



 Loaded pretrained model! 

Prediction:  Brother ⊔ Daughter

Duration:  0.4016838073730469  seconds


In [15]:
quality(concept, pos, neg)

Accuracy: 100.0%
Precision: 100.0%
Recall: 100.0%
F1: 100.0%


### Complex learning problems, potentially without an exact solution

#### First learning problem

In [16]:
all_individuals = set(KB.individuals())
pos = set(random.sample(list(all_individuals), 150))
remaining = all_individuals-pos
neg = set(random.sample(list(remaining), min(100, len(remaining))))

In [17]:
nces.pretrained_model_name

['SetTransformer', 'GRU', 'LSTM']

In [18]:
t0 = time.time()
concept = nces.fit(pos, neg)
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")

Prediction:  Person ⊓ (∀ married.(PersonWithASibling ⊔ (∀ hasChild.(¬Sister))))

Duration:  0.34989166259765625  seconds


In [19]:
quality(concept, pos, neg)

Accuracy: 100.0%
Precision: 100.0%
Recall: 100.0%
F1: 100.0%


#### Second learning problem

In [20]:
pos = set(random.sample(list(all_individuals), 80))
remaining = all_individuals-pos
neg = set(random.sample(list(remaining), min(150, len(remaining))))

In [21]:
t0 = time.time()
concept = nces.fit(pos, neg)
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")

Prediction:  Person ⊔ Male

Duration:  0.3551011085510254  seconds


In [22]:
quality(concept, pos, neg)

Accuracy: 100.0%
Precision: 100.0%
Recall: 100.0%
F1: 100.0%


## Important note

- Each of the synthesized expressions, e.g., Person ⊓ (∀ married.(PersonWithASibling ⊔ (∀ hasChild.(¬Sister)))) are not present in the knowledge base.
- NCES synthesizes solutions by leveraging its experience on the training data.
- The inputs (positive/negative examples) need not be balanced
- NCES can solve multiple learning problems at the same time (through broadcasting on matrix operations in its neural network component)
- Since LSTM and GRU are not permutation-equivariant, we can get different but closely related solutions by shuflling the input examples for these architectures. For this, one needs to instantiate the NCES class with the attribute "sorted_examples=False".