In [1]:
import datetime as dt
from libs.utils.params import Params

params = Params(
    size=Params(
        # Sampling sizes
        size=1000,
        initial=4000,
        decrease=0.9,
        plateau=600
    ),
    # Random state
    seed=None,
    # Thresholds
    threshold=Params(
        adaptative=True,
        opt=0.9,
        initial=0.9,
        min=0.6,
        step=0.05,
        expressive=0.5,
        current=0.9,
    ),
    max_depth=4,
    max_depth_step=0,
    patterns=Params(
        individuals=True,
        existential=True,
    ),
    embeddings="toy",
    clustering=Params(
        affinity="euclidean",
        linkage="ward",
    ),
    metric="harmonic",
    max_axioms=2,
    min_gain=0.08,
    allow_child=False,
    sort_axioms=False,
    others=Params(
        keep=True,
        n=8, # Max number of candidates to keep
        threshold=0.9, # % of the optimal score
    ),
    halting=Params(
        min_size=30,
        max_rec_steps=40,
        max_clustering_steps=100,
        max_extracted_depth=15,
        memory_limit=110*1024**2 # gigabytes
    ),
    extra=Params(
        active=True,
        n=100,
        reset_classes=True,
        depth=20,
        threshold=0.15
    ),
    record=Params(
        save_taxonomy=True,
        checkpoints=True,
        checkpoint_every=100,
        dirname="results/taxonomy/auto",
        name_pattern="taxonomy_{halting.max_clustering_steps}s_{timestamp:%m%d_%Hh%M}"
    ),
    display=True
)
#params.record.taxname = params.record.name_pattern.format(**params)

params.save("test.json")

In [2]:
from libs.graph import KnowledgeGraph

kg = KnowledgeGraph.from_dir("toy")

Triples: 100%|█████████████████████████████████████████████████████████████| 316114/316114 [00:01<00:00, 158467.89it/s]


In [3]:
from libs.embeddings import load

E = load("toy")

E.shape

(54795, 50)

In [15]:
ca, cb = "dbo:TennisPlayer", "dbo:Organisation"

B = [h for h, _, _ in kg.find_triples(r=kg.isaid, t=kg.ent.to_id(ca), max_results=100)]
A = [h for h, _, _ in kg.find_triples(r=kg.isaid, t=kg.ent.to_id(cb), max_results=100)]

In [17]:
import libs.axiom_extraction as lae
from importlib import reload

lae = reload(lae)

ind = lae.Inducer(B, A, kg, threshold=0)

print(ind)

res = ind.find(allow_neg=True)
res

Inducer(entities=200, axioms=2062)
Finding axioms

Step 0/3: 1 axioms to improve
Improving __empty__...
Coverage too low (0.00<0.85). Adding OR clauses...
Specificity too low (0.00<0.85). Adding AND clauses...
...8248 results found

Step 1/3: 2 axioms to improve
Improving ¬∃dbo:broadcastNetwork.dbo:Organisation...
...0 results found
Improving ¬∃dbo:broadcastNetwork.dbo:Organisation...
...0 results found


0,1,2,3,4
axiom,cov,spe,sco,
0,,,,
¬∃dbo:broadcastNetwork.dbo:Organisation,1.00,0.99,0.99,0.0
¬∃dbo:broadcastNetwork.dbo:Organisation,1.00,0.99,0.99,0.0


In [6]:
help(ind.find)

Help on method find in module libs.axiom_extraction.inducer:

find(max_axioms=3, min_gain=0.05, keep_n=5, forbidden=None, **kwargs) method of libs.axiom_extraction.inducer.Inducer instance



In [9]:
res

0,1,2,3
axiom,cov,spe,sco
