In [None]:
import tensorflow as tf
import logictensornetworks as ltn
import pandas as pd
import numpy as np
import openpyxl
import itertools

In [None]:
target = pd.read_csv('../Haoming/target_result/GOLD_multiclass.csv', index_col=0)
target.drop(index=225, inplace=True) # no.225 data is lost

train_target = target[target['train'] == 1].drop(['train', 'test'], axis=1)
test_target = target[target['test'] == 1].drop(['train', 'test'], axis=1)

In [None]:
feature_matrix = pd.read_csv('../Haoming/additional/_feature_matrix_all_sections_.csv', index_col=0)
feature_matrix.fillna(0, inplace=True)

train_feature = feature_matrix.loc[train_target.index]
test_feature = feature_matrix.loc[test_target.index]

features = train_feature.columns

In [None]:
entities = target.columns.values[0:-2]
class_name = []
for i in range(32):
    name  = entities[i]
    class_name.append(name + '_absent')
    class_name.append(name + '_questionable')
    class_name.append(name + '_present')
    if i%2 == 1:
        class_name.append(name + '_unmentioned')

In [None]:
with open('../Haoming/info_previously_extracted_from_ontologies/rxcui_ingredient.json', 'r') as fp:
    rxn_relationships = json.load(fp)
    
rxn_relations = []
rxn_entities = []

for key, values in rxn_relationships.items():
    for value in values:
        if value not in rxn_entities:
            rxn_entities.append("rxn_" + value)
        rxn_relations.append(("rxn_" + key, "rxn_" + value))
        
with open('../Haoming/info_previously_extracted_from_ontologies/snomed_parents_inferred.json', 'r') as fp:
    sct_relationships = json.load(fp)

sct_relations = []
sct_entities = []

for key, values in sct_relationships.items():
    for value in values:
        if value not in sct_entities:
            sct_entities.append("sct_" + value)
        sct_relations.append(("sct_" + key, "sct_" + value))


In [None]:
parents = []
for name in entities:
    absent = train_feature.loc[train_target[train_target[name] == -1].index.tolist()]
    questionable = train_feature.loc[train_target[train_target[name] == 0].index.tolist()]
    present = train_feature.loc[train_target[train_target[name] == -1].index.tolist()]        
    unmentioned = train_feature.loc[train_target[train_target[name] == 3].index.tolist()]
    
    for value in features:
        if absent[value].sum() != 0:
            parents.append((name + '_absent', value))
        if questionable[value].sum() != 0:
            parents.append((name + '_questionable', value))
        if present[value].sum() != 0:
            parents.append((name + '_present', value))
        if unmentioned[value].sum() != 0:
            parents.append((name + '_unmentioned', value))

In [None]:
parents = parents + rxn_relations + sct_relations
entities = class_name + rxn_entities + sct_entities

entities = np.unique(entities)
parents = np.unique(parents, axis=0)

all_relationships = list(itertools.product(entities, repeat=2))

In [None]:
import threading
import logging
import math
from numba import jit, cuda

threads = []
thread_num = 4
array_len = math.floor(len(all_relationships) / thread_num)
not_parents = []


def helper_fun(sub_array):
    result = [item for item in sub_array if item not in parents]
    print(result[0])
    np.concatenate((not_parents, result))

@cuda.jit
def setNoParents():
    for i in range(thread_num):
        start_index = array_len  * i
        end_index = array_len * (i + 1)
        sub_array = all_relationships[start_index : end_index]
        print("Main    : create and start thread %d.", i)
        thread = threading.Thread(target=helper_fun, args=(sub_array,))
        threads.append(thread)
        thread.start()  
        
    for index, thread in enumerate(threads):
        print("Main    : before joining thread %d.", index)
        thread.join()
        print("Main    : thread %d done", index)
    
threadsperblock = 32
blockspergrid = (len(all_relationships) + (threadsperblock - 1)) // threadsperblock
setNoParents[blockspergrid, threadsperblock]()


In [None]:
embedding_size = 1

Parent = ltn.Predicate.MLP([embedding_size, embedding_size], hidden_layer_sizes=[8,8])
Ancestor = ltn.Predicate.MLP([embedding_size, embedding_size], hidden_layer_sizes=[8,8])

g_e = {
    l: ltn.Constant(np.random.uniform(low=0, high=1, size=embedding_size), trainable=True)
    for l in entites
}

In [None]:
Not = ltn.Wrapper_Connective(ltn.fuzzy_ops.Not_Std())
And = ltn.Wrapper_Connective(ltn.fuzzy_ops.And_Prod())
Or = ltn.Wrapper_Connective(ltn.fuzzy_ops.Or_ProbSum())
Implies = ltn.Wrapper_Connective(ltn.fuzzy_ops.Implies_Reichenbach())
Forall = ltn.Wrapper_Quantifier(ltn.fuzzy_ops.Aggreg_pMeanError(p=5),semantics="forall")
Exists = ltn.Wrapper_Quantifier(ltn.fuzzy_ops.Aggreg_pMean(p=5),semantics="exists")

formula_aggregator = ltn.Wrapper_Formula_Aggregator(ltn.fuzzy_ops.Aggreg_pMeanError(p=5))

In [None]:
@tf.function
def axioms():
    a = ltn.Variable.from_constants("a", list(g_e.values()))
    b = ltn.Variable.from_constants("b", list(g_e.values()))
    c = ltn.Variable.from_constants("c", list(g_e.values()))


    axioms = [
        Forall((a,b), Implies(Parent([a,b]),Ancestor([a,b]))),
        Forall(a, Not(Parent([a,a]))),
        Forall(a, Not(Ancestor([a,a]))),
        Forall((a,b), Implies(Parent([a,b]),Not(Parent([b,a])))),
        Forall(
            (a,b,c),
            Implies(And(Parent([a,b]),Ancestor([b,c])), Ancestor([a,c])),
            p=6
        ),
        Forall(
            (a,b),
            Implies(Ancestor([a,b]), 
                    Or(Parent([a,b]), 
                       Exists(c, And(Ancestor([a,c]),Parent([c,b])),p=6)
                      )
                   )
        )
    ]  
    
    sat_level = formula_aggregator(axioms).tensor
    return sat_level

In [None]:
print("Initial sat level %.5f"%axioms())

In [None]:
trainable_variables = \
        Parent.trainable_variables\
        +Ancestor.trainable_variables \
        +ltn.as_tensors(list(g_e.values()))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
for epoch in range(3000):
    with tf.GradientTape() as tape:
        loss_value = 1. - axioms()
    grads = tape.gradient(loss_value, trainable_variables)
    optimizer.apply_gradients(zip(grads, trainable_variables))
    if epoch%200 == 0:
        print("Epoch %d: Sat Level %.3f"%(epoch, axioms()))
print("Training finished at Epoch %d with Sat Level %.3f"%(epoch, axioms()))