In [1]:
cd ..

/home/janek/mag/reasonable-embeddings


In [2]:
import numpy as np
import pandas as pd
import torch as T
import torch.nn.functional as F
import torch.nn as nn
from sklearn import metrics
from sklearn.decomposition import PCA
from tqdm import tqdm

from src.reasoner_mod import *
from src.simplefact import *
from src.simplefact.syntax import *
from src.reasoner import *
from src.generate import *
from src.utils import *
from src.vis import *
import random
seed = 2022
ts = timestr()

In [3]:
onto_paths = 'african-wildlife.ofn exchangemodel-noimport.ofn stuff-noimport.ofn swo-noimport.ofn ontodt-noimport.ofn pizza.ofn'.split()
onto_names = 'wildlife demacare stuff swo ontodt pizza'.split()
ontos = []
facts = []
for path in onto_paths:
    onto = load_ofn('local/out/dataset/' + path, silent=True)
    fact = Reasoner.from_onto(onto, timeout=None)
    facts.append(fact)
    ontos.append(onto)
    print(path, '\t', onto)

african-wildlife.ofn 	 <Onto 30 concepts, 5 roles, 49 axioms>
exchangemodel-noimport.ofn 	 <Onto 260 concepts, 53 roles, 369 axioms>
stuff-noimport.ofn 	 <Onto 192 concepts, 46 roles, 314 axioms>


FaCT++.Kernel: Reasoner for the SROIQ(D) Description Logic, 64-bit
Copyright (C) Dmitry Tsarkov, 2002-2017. Version 1.7.0-SNAPSHOT (01 January 2017)


swo-noimport.ofn 	 <Onto 4067 concepts, 39 roles, 7140 axioms>
ontodt-noimport.ofn 	 <Onto 405 concepts, 8 roles, 921 axioms>
pizza.ofn 	 <Onto 99 concepts, 4 roles, 287 axioms>


In [4]:
rng = np.random.default_rng(seed)
idx_te, X_te, y_te = [], [], []
for i, (onto, fact, onto_name) in enumerate(zip(ontos, facts, onto_names)):
    X_te_onto, y_te_onto = make_dataset(onto, fact, rng, n_queries=3000, min_query_size=15, max_query_size=20)
    idx_te += [i]*len(X_te_onto)
    print(onto_name, '- balance', np.round(np.mean(y_te_onto), 4))
    X_te += X_te_onto
    y_te += y_te_onto
    
data_te = idx_te, X_te, y_te = jointshuffle(rng, idx_te, X_te, y_te)

wildlife - balance 0.0623
demacare - balance 0.0303
stuff - balance 0.2253
swo - balance 0.2147
ontodt - balance 0.0457
pizza - balance 0.3677


In [5]:
idx_vl, X_vl, y_vl = [], [], []
for i, (onto, fact, onto_name) in enumerate(zip(ontos, facts, onto_names)):
    X_vl_onto, y_vl_onto = make_dataset(onto, fact, rng, n_queries=3000, min_query_size=15, max_query_size=20)
    idx_vl += [i]*len(X_vl_onto)
    print(onto_name, '- balance', np.round(np.mean(y_vl_onto), 4))
    X_vl += X_vl_onto
    y_vl += y_vl_onto
    
data_vl = idx_vl, X_vl, y_vl = jointshuffle(rng, idx_vl, X_vl, y_vl)

wildlife - balance 0.0637
demacare - balance 0.0337
stuff - balance 0.2313
swo - balance 0.201
ontodt - balance 0.0463
pizza - balance 0.3543


In [6]:
idx_tr, X_tr, y_tr = [], [], []
for i, (onto, fact, onto_name) in enumerate(zip(ontos, facts, onto_names)):
    X_tr_onto, y_tr_onto = make_dataset(onto, fact, rng, 16000, 10, 14)
    idx_tr += [i]*len(X_tr_onto)
    print(onto_name, '- balance', np.round(np.mean(y_tr_onto), 4))
    X_tr += X_tr_onto
    y_tr += y_tr_onto

data_tr = reduce_dataset([idx_tr, X_tr, y_tr], len(ontos), 14000, data_te)
idx_tr, X_tr, y_tr = data_tr[0], data_tr[1], data_tr[2]

mean_values = {}
for i in set(idx_tr):
    y_values = [y for idx, y in zip(idx_tr, y_tr) if idx == i]
    mean_values[i] = np.mean(y_values)

for i in sorted(mean_values):
    print(f"{onto_names[i]} - balance: {mean_values[i]:.4f}")

data_tr = idx_tr, X_tr, y_tr = jointshuffle(rng, idx_tr, X_tr, y_tr)

wildlife - balance 0.0753
demacare - balance 0.0368
stuff - balance 0.231
swo - balance 0.2389
ontodt - balance 0.0551
pizza - balance 0.3744
wildlife - balance: 0.0623
demacare - balance: 0.0303
stuff - balance: 0.2253
swo - balance: 0.2146
ontodt - balance: 0.0456
pizza - balance: 0.3676


In [14]:
save_test_dataset(dataset=[ontos, data_tr, data_vl, data_te],path='local/out/dataset/dataExp6-9.json')