In [1]:
cd ..

/home/janek/reasonable-embeddings


In [2]:
import numpy as np
import pandas as pd
import torch as T
import torch.nn.functional as F
import torch.nn as nn
import random
from sklearn import metrics
from src.reasoner_mod import *
from src.generate import load_dataset, prepare_data, count_elements, reduce_dataset, make_dataset
from src.reasoner import *
from src.utils import *
from src.vis import *


In [3]:
seed = 2022
dataset_path = 'local/out/dataset/sub-100.json'
ts = timestr()
train_onto, test_onto, data_tr, data_vl, data_te = load_dataset(dataset_path)
rng = np.random.default_rng(seed)

In [4]:
data_tr, data_vl, data_te_tr, data_te_vl, idx_te_te, X_te_te, y_te_te = prepare_data(data_tr, data_vl, data_te, seed, 6)

In [None]:
for i, onto in enumerate(train_onto):
    fact = Reasoner.from_onto(onto, timeout=None)
    queries, answers = make_dataset(onto, fact, rng, 400 - data_vl[0].count(i), 7 , 9)
    data_vl[1].extend(queries)
    data_vl[0].extend(len(queries)*[i])
    data_vl[2].extend(answers)

In [5]:
data_tr = reduce_dataset(data_tr, len(train_onto), 1600, data_vl)

In [6]:
print(np.mean(data_tr[2]))
print(np.mean(data_vl[2]))
print()
print(np.mean(data_te_tr[2]))
print(np.mean(data_te_vl[2]))
print(np.mean(y_te_te))

0.188546875
0.191374246405936

0.2203348305430788
0.18243243243243243
0.18631643249847282


In [7]:
emb_size = 10
hidden_size = 16
epoch_count = 10
test_epoch_count = 10
batch_size = 32

T.manual_seed(seed)
trained_reasoner = ModifiedReasonerHead(emb_size=emb_size, hidden_size=hidden_size)
encoders = [ModifiedEmbeddingLayer.from_onto(onto, emb_size=emb_size) for onto in train_onto]

print(f'created reasoner with {paramcount(trained_reasoner)} parameters')
print(f'created {len(encoders)} encoders with {paramcount(encoders[0])} parameters each')
# train_logger = train_mod(data_tr, data_vl, trained_reasoner, encoders, epoch_count=epoch_count,
                        #   batch_size=batch_size, identities_weight=0.02, identitity_weight_decay=0.96)

created reasoner with 3283 parameters
created 40 encoders with 1270 parameters each


In [8]:
import optuna
from sklearn.metrics import precision_recall_curve, roc_auc_score, accuracy_score
from sklearn.metrics import auc

def objective(trial):
    T.manual_seed(seed)
    random.seed(seed)
    trained_reasoner = ModifiedReasonerHead(emb_size=emb_size, hidden_size=hidden_size)
    encoders = [ModifiedEmbeddingLayer.from_onto(onto, emb_size=emb_size) for onto in train_onto]

    id_weight = trial.suggest_int('identity_weight', 10, 100)
    decay= trial.suggest_int('decay',900, 999)

    id_weight/=1000
    decay/=1000
    y, Y = train_mod(data_tr, data_vl, trained_reasoner, encoders, epoch_count=15,
                            batch_size=batch_size, identities_weight=id_weight, identitity_weight_decay=decay)
    
    prec, rec, _ = precision_recall_curve(y, Y)
    
    print('prc: ', auc(rec, prec))
    print('roc: ', roc_auc_score(y,Y))
    K = np.array(Y) > 0.5
    print('acc: ',accuracy_score(y,K))

    return auc(rec, prec)*10 + roc_auc_score(y,Y)*6 + accuracy_score(y,K)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Wyświetl najlepsze hiperparametry oraz wartość dokładności
print('Najlepsze hiperparametry:')
print(study.best_params)
print('Wartość średniej dokładności dla najlepszych hiperparametrów:')
print(study.best_value)

[I 2024-07-29 19:38:21,402] A new study created in memory with name: no-name-0368d66f-0d76-424c-a79c-d272e41ed5f1


train epoch 00/10 | batch 2001/2000 | loss 0.8458 | val loss 0.6940 | acc 0.5032 | f1 0.2387 | prec 0.1689 | recall 0.4071 | roc auc 0.4492 | pr auc 0.1681 | elapsed 36.51s
train epoch 01/10 | batch 2001/2000 | loss 0.6651 | val loss 0.4598 | acc 0.8397 | f1 0.3128 | prec 0.8708 | recall 0.1906 | roc auc 0.6908 | pr auc 0.4937 | elapsed 108.06s
train epoch 02/10 | batch 2001/2000 | loss 0.4498 | val loss 0.3689 | acc 0.8513 | f1 0.5233 | prec 0.6769 | recall 0.4265 | roc auc 0.8183 | pr auc 0.6260 | elapsed 118.30s
train epoch 03/10 | batch 2001/2000 | loss 0.3241 | val loss 0.3292 | acc 0.8754 | f1 0.6310 | prec 0.7283 | recall 0.5565 | roc auc 0.8585 | pr auc 0.7142 | elapsed 98.95s
train epoch 04/10 | batch 2001/2000 | loss 0.2450 | val loss 0.3047 | acc 0.8856 | f1 0.6769 | prec 0.7367 | recall 0.6260 | roc auc 0.8820 | pr auc 0.7681 | elapsed 93.74s
train epoch 05/10 | batch 2001/2000 | loss 0.1949 | val loss 0.2875 | acc 0.8953 | f1 0.7128 | prec 0.7507 | recall 0.6785 | roc auc 

[W 2024-07-29 19:54:47,886] Trial 0 failed with parameters: {'identity_weight': 11, 'decay': 914} because of the following error: TypeError('cannot unpack non-iterable TrainingLogger object').
Traceback (most recent call last):
  File "/home/janek/miniconda3/envs/reasoner/lib/python3.9/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_6247/587486989.py", line 14, in objective
    y, Y = train_mod(data_tr, data_vl, trained_reasoner, encoders, epoch_count=10,
TypeError: cannot unpack non-iterable TrainingLogger object
[W 2024-07-29 19:54:47,888] Trial 0 failed with value None.


train epoch 10/10 | batch 2001/2000 | loss 0.1075 | val loss 0.2673 | acc 0.9065 | f1 0.7636 | prec 0.7396 | recall 0.7892 | roc auc 0.9332 | pr auc 0.8663 | elapsed 90.02s


TypeError: cannot unpack non-iterable TrainingLogger object

In [None]:
## --- TESTING
trained_test_encoders = {}
T.manual_seed(seed)
test_logger = TrainingLogger(validate=True, metrics=batch_stats)

for reasoner_name, reasoner in [('trained reasoner', trained_reasoner)]:
    print(reasoner_name)
    T.manual_seed(seed)
    trained_test_encoders[reasoner_name] = test_encoders = [EmbeddingLayer.from_onto(onto, emb_size=emb_size) for onto in test_onto]
    train_mod(data_te_tr, data_te_vl, reasoner, test_encoders, epoch_count=test_epoch_count, batch_size=batch_size, run_name=reasoner_name, freeze_reasoner=True, logger=test_logger)

trained reasoner
train epoch 00/10 | batch 849/848 | loss 0.7694 | val loss 0.7728 | acc 0.2077 | f1 0.3440 | prec 0.2077 | recall 1.0000 | roc auc 0.4990 | pr auc 0.2375 | elapsed 7.20s
train epoch 01/10 | batch 849/848 | loss 0.7689 | val loss 0.7724 | acc 0.2077 | f1 0.3440 | prec 0.2077 | recall 1.0000 | roc auc 0.5104 | pr auc 0.2452 | elapsed 12.84s
train epoch 02/10 | batch 849/848 | loss 0.7677 | val loss 0.7720 | acc 0.2077 | f1 0.3440 | prec 0.2077 | recall 1.0000 | roc auc 0.5223 | pr auc 0.2537 | elapsed 12.80s
train epoch 03/10 | batch 849/848 | loss 0.7664 | val loss 0.7715 | acc 0.2077 | f1 0.3440 | prec 0.2077 | recall 1.0000 | roc auc 0.5342 | pr auc 0.2620 | elapsed 12.35s
train epoch 04/10 | batch 849/848 | loss 0.7651 | val loss 0.7711 | acc 0.2077 | f1 0.3440 | prec 0.2077 | recall 1.0000 | roc auc 0.5454 | pr auc 0.2704 | elapsed 12.64s
train epoch 05/10 | batch 849/848 | loss 0.7638 | val loss 0.7706 | acc 0.2077 | f1 0.3440 | prec 0.2077 | recall 1.0000 | roc au

In [None]:
with T.no_grad():
    # idx_te, X_te, y_te = data_te
    _, _, Y_te_good = eval_batch(trained_reasoner, trained_test_encoders['trained reasoner'], X_te_te, y_te_te, idx_te_te)
    # _, _, Y_te_rand = eval_batch(random_reasoner, trained_test_encoders['random reasoner'], X_te, y_te, idx_te)

In [None]:
base = 'local/out/exp/' + ts
mkdirp(base)
print(base)

T.save(trained_reasoner.state_dict(), base+'/reasoner.pt')
df_tr = pd.DataFrame(train_logger.history_tr)
df_vl = pd.DataFrame(train_logger.history_vl)
df_tr.to_csv(base+'/train.csv', index=False)
df_vl.to_csv(base+'/valid.csv', index=False)
plot_train_history(df_tr, df_vl, save=base+'/train.png')

test_history_by_onto = pd.DataFrame(test_logger.history_vl_by_onto)
test_history = pd.DataFrame(test_logger.history_vl)
test_results = pd.DataFrame(dict(idx_te=idx_te_te, y_te=y_te_te, Y_te_good=Y_te_good))
test_history.to_csv(base+'/test.csv', index=False)
test_history_by_onto.to_csv(base+'/test-grouped.csv', index=False)
test_results.to_csv(base+'/test-results.csv', index=False)
plot_test_history(test_history, test_history_by_onto)
print(report(test_onto, y_te_te, np.array(Y_te_good), idx_te_te))
# print(report(test_onto, y_te, np.array(Y_te_rand), idx_te))

local/out/exp/20240716T183807


NameError: name 'train_logger' is not defined

In [None]:
import random

In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input = encoder.concepts[ int(np.round( random.random() * encoder.n_concepts , 0) - 1) ]
    losses.append( F.l1_loss(input, trained_reasoner.and_nn(im(input, input))).item() )


print(input)
print(trained_reasoner.and_nn(im( input, input)))
print(np.mean(losses))

tensor([-0.0853,  0.0049,  0.0044,  0.1040, -0.2791, -0.0971, -0.0545, -0.2373,
         0.0291, -0.1781], grad_fn=<SelectBackward0>)
tensor([ 0.0283, -0.1301,  0.1543, -0.1816,  0.1470, -0.1504, -0.1508, -0.1428,
         0.1558,  0.1439], grad_fn=<AddBackward0>)
0.14532663188874723


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input = encoder.concepts[ int(np.round( random.random() * encoder.n_concepts , 0) - 1) ]
    losses.append(F.l1_loss(trained_reasoner.and_nn(im(trained_reasoner.bot_concept[0], input)), trained_reasoner.bot_concept[0]).item())
print(trained_reasoner.bot_concept[0])
print(trained_reasoner.and_nn(im(trained_reasoner.bot_concept[0], input)))
print(np.mean(losses))

tensor([ 0.8487,  0.3154, -0.0030, -0.4652,  0.9116, -0.0422,  0.2849,  0.1790,
        -0.4642, -0.3235], grad_fn=<SelectBackward0>)
tensor([ 0.5006,  0.4196, -0.2333, -0.1082,  0.1215,  0.4181,  0.3491,  0.4251,
        -0.4716, -0.1767], grad_fn=<AddBackward0>)
0.27251186221838


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input = encoder.concepts[ int(np.round( random.random() * encoder.n_concepts , 0) - 1) ]
    losses.append(F.l1_loss(trained_reasoner.and_nn(im(trained_reasoner.top_concept[0], input)), input).item())

print(input)
print(trained_reasoner.and_nn(im(trained_reasoner.top_concept[0], input)))
print(np.mean(losses))

tensor([ 0.0393, -0.1274, -0.2614, -0.1177, -0.0585, -0.0664,  0.1144, -0.2802,
         0.1001,  0.0221], grad_fn=<SelectBackward0>)
tensor([-0.2182, -0.3193,  0.2273, -0.1661,  0.1478, -0.1756, -0.1602, -0.2823,
         0.2452,  0.2916], grad_fn=<AddBackward0>)
0.1793355718255043


In [None]:
losses = []
for _ in range(20):

    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input = encoder.concepts[ int(np.round( random.random() * encoder.n_concepts , 0) - 1) ]
    output = trained_reasoner.and_nn(im(trained_reasoner.not_nn(input), input))
    losses.append(F.l1_loss(trained_reasoner.bot_concept[0], output).item())

print(trained_reasoner.bot_concept[0])
print(output)
print(np.mean(losses))

tensor([ 0.8487,  0.3154, -0.0030, -0.4652,  0.9116, -0.0422,  0.2849,  0.1790,
        -0.4642, -0.3235], grad_fn=<SelectBackward0>)


tensor([ 0.0291, -0.0860,  0.1372, -0.2300,  0.1739, -0.1169, -0.1536, -0.1260,
         0.1137,  0.0972], grad_fn=<AddBackward0>)
0.36536308377981186


In [None]:
print(trained_reasoner.bot_concept[0])
output = trained_reasoner.not_nn(trained_reasoner.top_concept[0])
print(output)
print(F.l1_loss(trained_reasoner.bot_concept[0], output))

tensor([ 0.8487,  0.3154, -0.0030, -0.4652,  0.9116, -0.0422,  0.2849,  0.1790,
        -0.4642, -0.3235], grad_fn=<SelectBackward0>)
tensor([ 0.8487,  0.3152, -0.0028, -0.4649,  0.9116, -0.0422,  0.2849,  0.1790,
        -0.4640, -0.3235], grad_fn=<SqueezeBackward3>)
tensor(9.1464e-05, grad_fn=<L1LossBackward0>)


In [None]:
print(trained_reasoner.top_concept[0])
output = trained_reasoner.not_nn(trained_reasoner.bot_concept[0])
print(output)
print(F.l1_loss(trained_reasoner.top_concept[0], output))

tensor([ 0.0286, -0.2194,  0.0205,  0.4425,  0.1463,  0.3542,  1.5009,  0.0721,
        -0.2669,  0.2835], grad_fn=<SelectBackward0>)
tensor([ 0.0288, -0.2195,  0.0202,  0.4425,  0.1462,  0.3544,  1.5013,  0.0722,
        -0.2671,  0.2835], grad_fn=<SqueezeBackward3>)
tensor(0.0001, grad_fn=<L1LossBackward0>)


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input1 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]

    input2 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]

    input3 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]

    losses.append(F.l1_loss(trained_reasoner.and_nn(im(input1, trained_reasoner.and_nn(im(input2, input3)))), trained_reasoner.and_nn(im(trained_reasoner.and_nn(im(input1, input2)), input3))).item())

print(trained_reasoner.and_nn(im(input1, trained_reasoner.and_nn(im(input2, input3)))))
print(trained_reasoner.and_nn(im(trained_reasoner.and_nn(im(input1, input2)), input3)))
print(np.mean(losses))

tensor([ 0.1326, -0.0631,  0.0128, -0.1697,  0.1839,  0.0128, -0.0778, -0.0954,
        -0.0058,  0.0982], grad_fn=<AddBackward0>)
tensor([ 0.0459,  0.0285,  0.0899, -0.2081,  0.1915, -0.0053, -0.0852, -0.0492,
        -0.0394,  0.0515], grad_fn=<AddBackward0>)
0.03770795250311494


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input1 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]
    input2 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]

    losses.append( F.l1_loss(trained_reasoner.and_nn(im(input1, input2)), trained_reasoner.and_nn(im(input2, input1))).item())

print(trained_reasoner.and_nn(im(input1, input2)))
print(trained_reasoner.and_nn(im(input2, input1)))

print(np.mean(losses))

tensor([ 0.0574,  0.0155,  0.0813, -0.1991,  0.1782,  0.0296, -0.0436,  0.0211,
        -0.0320,  0.0349], grad_fn=<AddBackward0>)
tensor([ 0.1004, -0.0202,  0.0521, -0.2140,  0.1707, -0.0263, -0.0697,  0.0596,
        -0.1071,  0.0224], grad_fn=<AddBackward0>)
0.029833172308281065


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input1 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]
    losses.append(F.l1_loss(input1, trained_reasoner.and_nn(im(input1, trained_reasoner.top_concept[0]))).item())

print(input1)
print(trained_reasoner.and_nn(im(input1, trained_reasoner.top_concept[0])))

print(np.mean(losses))

tensor([ 0.1733,  0.2138, -0.0319, -0.2493,  0.1992,  0.0884,  0.0452,  0.0611,
        -0.2770,  0.0685], grad_fn=<SelectBackward0>)
tensor([ 0.0836,  0.1108,  0.0115, -0.2417,  0.0640,  0.0104,  0.0819,  0.1758,
        -0.1955, -0.0690], grad_fn=<AddBackward0>)
0.1997889805585146


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input1 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]
    losses.append( T.sigmoid(trained_reasoner.sub_nn(im(input1, trained_reasoner.top_concept[0]))).item())
print(np.mean(losses))

0.9999014347791672


In [None]:
losses = []
for _ in range(20):
    encoder = trained_test_encoders['trained reasoner'][int(np.round(random.random() * (len(trained_test_encoders['trained reasoner'] ) - 1) , 0))]
    input1 = encoder.concepts[int(np.round(random. random() * encoder.n_concepts, 0) - 1)]
    losses.append(T.sigmoid(trained_reasoner.sub_nn(im(trained_reasoner.bot_concept[0], trained_reasoner.bot_concept[0]))).item())
print(np.mean(losses))

1.0
