From the main directory "Ontolearn", run the commands for NCES data mentioned [here](https://ontolearn-docs-dice-group.netlify.app/usage/02_installation#download-external-files) to download pretrained models and datasets.

In [1]:
from ontolearn.concept_learner import NCES
from ontolearn.knowledge_base import KnowledgeBase
from owlapy.parser import DLSyntaxParser
from owlapy.render import DLSyntaxObjectRenderer
import sys
sys.path.append("examples/")
from ontolearn.metrics import F1
from quality_functions import quality
import time
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
nces = NCES(knowledge_base_path="../NCESData/family/family.owl", quality_func=F1(), num_predictions=100, learner_names=["SetTransformer"],
     path_of_embeddings="../NCESData/family/embeddings/ConEx_entity_embeddings.csv", load_pretrained=True, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32)


 Loaded NCES weights!



In [3]:
KB = KnowledgeBase(path=nces.knowledge_base_path)

In [4]:
dl_syntax_renderer = DLSyntaxObjectRenderer()

In [5]:
atomic_classes = [dl_syntax_renderer.render(a) for a in KB.ontology.classes_in_signature()]

In [6]:
atomic_classes

['Brother',
 'Male',
 'PersonWithASibling',
 'Child',
 'Person',
 'Daughter',
 'Female',
 'Father',
 'Parent',
 'Grandchild',
 'Granddaughter',
 'Grandfather',
 'Grandparent',
 'Grandmother',
 'Grandson',
 'Mother',
 'Sister',
 'Son']

In [7]:
dl_parser = DLSyntaxParser(nces.kb_namespace)

In [8]:
brother = dl_parser.parse('Brother')
daughter = dl_parser.parse('Daughter')

#### Input examples can be sets or lists

In [9]:
pos = set(KB.individuals(brother)).union(set(KB.individuals(daughter)))

In [10]:
neg = set(KB.individuals())-set(pos)

In [11]:
len(neg)

120

#### Prediction with SetTransformer (default model)

In [12]:
t0 = time.time()
node = list(nces.fit(pos, neg).best_predictions)[0]
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")


Duration:  0.4705181121826172  seconds


In [13]:
len(set(KB.individuals(node.concept)))

82

In [14]:
node

<class 'ontolearn.search.NCESNode'> at 0xc4363b4	PersonWithASibling ⊔ Daughter	Quality:1.0	Length:3	|Indv.|:82

In [15]:
quality(KB, node.concept, pos, neg)

Accuracy: 100.0%
Precision: 100.0%
Recall: 100.0%
F1: 100.0%


(100.0, 100.0, 100.0, 100.0)

### Ensemble prediction

In [16]:
nces.learner_names = ['SetTransformer','GRU','LSTM']
nces.refresh() # Loads model weights
t0 = time.time()
node = list(nces.fit(pos, neg).best_predictions)[0]
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")


 Loaded NCES weights!


Duration:  0.43787145614624023  seconds


In [17]:
quality(KB, node.concept, pos, neg)

Accuracy: 100.0%
Precision: 100.0%
Recall: 100.0%
F1: 100.0%


(100.0, 100.0, 100.0, 100.0)

In [18]:
node

<class 'ontolearn.search.NCESNode'> at 0xcaaf344	Brother ⊔ Daughter	Quality:1.0	Length:3	|Indv.|:82

In [19]:
nces.best_predictions[:5]

[<class 'ontolearn.search.NCESNode'> at 0xcaaf344	Brother ⊔ Daughter	Quality:1.0	Length:3	|Indv.|:82,
 <class 'ontolearn.search.NCESNode'> at 0xc43610c	Brother ⊔ (Person ⊓ (Daughter ⊔ (∃ hasSibling.Grandfather)))	Quality:1.0	Length:9	|Indv.|:82,
 <class 'ontolearn.search.NCESNode'> at 0xcabfcb0	Brother ⊔ Daughter ⊔ Brother	Quality:1.0	Length:5	|Indv.|:82,
 <class 'ontolearn.search.NCESNode'> at 0xcaa10ac	PersonWithASibling ⊔ Daughter	Quality:1.0	Length:3	|Indv.|:82,
 <class 'ontolearn.search.NCESNode'> at 0xcaa1144	Brother ⊔ Daughter	Quality:1.0	Length:3	|Indv.|:82]

### Complex learning problems, potentially without an exact solution

#### First learning problem

In [20]:
all_individuals = set(KB.individuals())
pos = set(random.sample(list(all_individuals), 150))
remaining = all_individuals-pos
neg = set(random.sample(list(remaining), min(100, len(remaining))))

In [21]:
nces.learner_names

['SetTransformer', 'GRU', 'LSTM']

In [22]:
t0 = time.time()
node = list(nces.fit(pos, neg).best_predictions)[0]
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")


Duration:  0.38033485412597656  seconds


In [23]:
node

<class 'ontolearn.search.NCESNode'> at 0xcaeaac8	Person ⊔ Grandson	Quality:0.85227	Length:3	|Indv.|:202

In [24]:
quality(KB, node.concept, pos, neg)

Accuracy: 74.25699999999999%
Precision: 74.25699999999999%
Recall: 100.0%
F1: 85.227%


(74.25699999999999, 74.25699999999999, 100.0, 85.227)

#### Second learning problem

In [25]:
pos = set(random.sample(list(all_individuals), 80))
remaining = all_individuals-pos
neg = set(random.sample(list(remaining), min(150, len(remaining))))

In [26]:
t0 = time.time()
node = list(nces.fit(pos, neg).best_predictions)[0]
t1 = time.time()
print("\nDuration: ", t1-t0, " seconds")


Duration:  0.3842136859893799  seconds


In [27]:
quality(KB, node.concept, pos, neg)

Accuracy: 39.604%
Precision: 39.604%
Recall: 100.0%
F1: 56.738%


(39.604, 39.604, 100.0, 56.738)

## Important note

- Each of the synthesized expressions are not present in the knowledge base.
- NCES synthesizes solutions by leveraging its experience on the training data.
- The inputs (positive/negative examples) need not be balanced
- NCES can solve multiple learning problems at the same time (through broadcasting on matrix operations in its neural network component), see nces_notebook1.ipynb
- Since LSTM and GRU are not permutation-equivariant, we can get different but closely related solutions by shuflling the input examples for these architectures. For this, one needs to instantiate the NCES class with the attribute "sorted_examples=False" which is the case by default.