In [1]:
import sys
sys.path.append('../../src/meta_rule/')

from lnn_operators import and_lukasiewicz, or_lukasiewicz, negation
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

torch.manual_seed(100)

<torch._C.Generator at 0x7f845827ddf0>

## Train and Val

In [17]:
# train and val

df_train_val = pd.read_csv("data/train.csv")
# df_train_val = df_train_val.loc[22:25]

features_train_val = np.array([np.fromstring(s[1:-1], dtype=np.float, sep=', ') for s in df_train_val.Features.values])

#to train a xor we need its truth table
X_train_val = torch.from_numpy(features_train_val).float()
print(X_train_val, X_train_val.shape)
#the target values for each row in the truth table (xor)
Y_train_val = torch.from_numpy(df_train_val.Label.values).float()
print(Y_train_val, Y_train_val.shape)
# mention_labels (cannot convert string explicitly)
mention_labels_train_val = df_train_val.Mention_label.values
print(mention_labels_train_val)

x_train, x_val, y_train, y_val, m_labels_train, m_labels_val = \
    train_test_split(X_train_val, Y_train_val, mention_labels_train_val, test_size=0.2,train_size=0.8, random_state=100)


tensor([[0.5158, 0.0000, 0.1579, 0.4390, 1.0000, 1.0000],
        [0.4815, 0.0000, 0.0000, 0.4980, 0.8569, 0.4000],
        [0.4815, 0.0000, 0.0000, 0.4292, 0.8430, 0.5000],
        ...,
        [0.9176, 0.6667, 0.5882, 0.9091, 0.1000, 1.0000],
        [0.6235, 0.4000, 0.3704, 0.8776, 0.1000, 0.0000],
        [0.6449, 0.4000, 0.4348, 0.8339, 0.1000, 0.0000]]) torch.Size([31553, 6])
tensor([0., 0., 0.,  ..., 0., 0., 0.]) torch.Size([31553])
['GMT;Greenwich Mean Time' 'GMT;UTC+02:00' 'GMT;UTC+08:00' ...
 'John Adams;John Adams School' 'John Adams;USS John Adams (SSBN-620) 1'
 'John Adams;USS John Adams (1799) 1']


## Test

In [15]:
# test
df_test = pd.read_csv("data/test.csv")

features_test = np.array([np.fromstring(s[1:-1], dtype=np.float, sep=', ') for s in df_test.Features.values])

x_test = torch.from_numpy(features_test).float()
print(x_test, x_test.shape)
y_test = torch.from_numpy(df_test.Label.values).float()
print(y_test, y_test.shape)
m_labels_test = df_test.Mention_label.values
print(m_labels_test)

tensor([[0.5962, 0.0000, 0.1538, 0.6732, 1.0000, 1.0000],
        [0.4259, 0.0000, 0.0278, 0.4125, 0.6727, 1.0000],
        [0.5722, 0.0000, 0.0741, 0.6346, 0.6726, 1.0000],
        ...,
        [0.0000, 0.5000, 0.5556, 0.7310, 0.1000, 0.0000],
        [0.4256, 0.3333, 0.3846, 0.5935, 0.1000, 0.0000],
        [0.6444, 0.3333, 0.3333, 0.5635, 0.1000, 1.0000]]) torch.Size([19048, 6])
tensor([0., 0., 0.,  ..., 0., 0., 0.]) torch.Size([19048])
['U.S.;United States' 'U.S.;National Register of Historic Places'
 'U.S.;United States Census Bureau' ... 'Pluto;HMS Pluto'
 'Pluto;Terry Pluto 1' 'Pluto;The Pluto Files']


In [16]:
print('train', x_train.shape)
print('val', x_test.shape)

train torch.Size([25242, 6])
val torch.Size([19048, 6])


# Train PureNameLNN

In [18]:
from RuleLNN_nway import *

# Sanity Check
model = PureNameLNN(0.9, 2, False)
print(model(x_train, m_labels_train))

tensor([[4.4205e-08],
        [7.0606e-05],
        [8.5324e-08],
        ...,
        [8.8844e-04],
        [6.9631e-05],
        [1.2467e-04]], grad_fn=<SWhereBackward>)




In [23]:
# Sanity Check
model = PureNameLNN(0.9, 2, False)
print(model(x_train, m_labels_train))

loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

def evaluate(eval_model, x_eval, y_eval, m_labels_eval):
    eval_model.eval()
    with torch.no_grad():
        val_pred = eval_model(x_eval, m_labels_eval)
        loss = loss_fn(val_pred, y_eval)
        val_pred_ = val_pred > 0.5
        print("val loss", loss)
        prec, recall, f1, _ = precision_recall_fscore_support(y_eval, val_pred_, average='macro')
        print("f1 w/ 0.5 threshold", f1)
    return loss, f1, val_pred
    

tensor([[6.7677e-09],
        [1.1838e-05],
        [1.5784e-08],
        ...,
        [3.0863e-04],
        [8.4752e-07],
        [2.4594e-05]], grad_fn=<SWhereBackward>)


In [24]:
best_pred = None
best_val_f1, best_val_loss = 0, 1000

for iter in range(200):

    model.train(True)
    optimizer.zero_grad()

    yhat = model(x_train, m_labels_train)
    loss = loss_fn(yhat, y_train)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    loss.backward()
    optimizer.step()

    val_loss, val_f1, val_pred = evaluate(model, x_val, y_val, m_labels_val)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_f1 = val_f1
        best_pred = val_pred
        torch.save(model.state_dict(), "best_PureNameLNN.pt")


Iteration 0: 0.06329169124364853
val loss tensor(0.0636)
f1 w/ 0.5 threshold 0.737399375850204
Iteration 1: 0.05505715310573578
val loss tensor(0.0569)
f1 w/ 0.5 threshold 0.7567702980472765
Iteration 2: 0.048817068338394165
val loss tensor(0.0534)
f1 w/ 0.5 threshold 0.7665207560125251
Iteration 3: 0.0443258099257946
val loss tensor(0.0480)
f1 w/ 0.5 threshold 0.7681632327104296
Iteration 4: 0.03993341699242592
val loss tensor(0.0467)
f1 w/ 0.5 threshold 0.7737535768060018
Iteration 5: 0.038260553032159805
val loss tensor(0.0447)
f1 w/ 0.5 threshold 0.7749194994701232
Iteration 6: 0.036917369812726974
val loss tensor(0.0443)
f1 w/ 0.5 threshold 0.7671011854647755
Iteration 7: 0.03630094230175018
val loss tensor(0.0439)
f1 w/ 0.5 threshold 0.7730893318635716
Iteration 8: 0.035539813339710236
val loss tensor(0.0435)
f1 w/ 0.5 threshold 0.782575032275846
Iteration 9: 0.03473767638206482
val loss tensor(0.0426)
f1 w/ 0.5 threshold 0.7858804542682787
Iteration 10: 0.03422999382019043
val l

Iteration 86: 0.02262766845524311
val loss tensor(0.0268)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 87: 0.02261422947049141
val loss tensor(0.0268)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 88: 0.022601181641221046
val loss tensor(0.0268)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 89: 0.022588621824979782
val loss tensor(0.0267)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 90: 0.022576697170734406
val loss tensor(0.0267)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 91: 0.02256515994668007
val loss tensor(0.0267)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 92: 0.022554049268364906
val loss tensor(0.0267)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 93: 0.02254355140030384
val loss tensor(0.0267)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 94: 0.02253328636288643
val loss tensor(0.0266)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 95: 0.02252349816262722
val loss tensor(0.0266)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 96: 0.0225142054

f1 w/ 0.5 threshold 0.7962631694885645
Iteration 170: 0.02218233235180378
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 171: 0.022180013358592987
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 172: 0.022177761420607567
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 173: 0.02217555232346058
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 174: 0.022173341363668442
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 175: 0.022171147167682648
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 176: 0.022169072180986404
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 177: 0.022166887298226357
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 178: 0.02216479554772377
val loss tensor(0.0262)
f1 w/ 0.5 threshold 0.7962631694885645
Iteration 179: 0.022162754088640213
val loss tensor(0.0261)
f1 w/ 0.5 thr

In [22]:
# tune on val set

print("Val -- The best f1 is {} w/ naive threshold 0.5".format(best_val_f1))

best_tuned_threshold = 0.5
best_tuned_f1 = best_val_f1

for threshold_ in np.linspace(0.0, 1.0, num=1000):
    y_val_preds = best_pred >= threshold_
    prec, recall, f1, _ = precision_recall_fscore_support(y_val, y_val_preds, average='macro')
    if f1 > best_tuned_f1:
        best_tuned_threshold = threshold_
        best_tuned_f1 = f1
print("Val -- After tuning, the best f1 is {} w/ threshold {}".format(best_tuned_f1, best_tuned_threshold))

Val -- The best f1 is 0.7998889248676659 w/ naive threshold 0.5
Val -- After tuning, the best f1 is 0.8415493457427705 w/ threshold 0.993993993993994


In [26]:
bestModel = PureNameLNN(0.9, 2, False)
bestModel.load_state_dict(torch.load("best_PureNameLNN.pt"))
bestModel.eval()

with torch.no_grad():
    test_pred = bestModel(x_test, m_labels_test)
    test_pred = test_pred >= best_tuned_threshold
    prec, recall, f1, _ = precision_recall_fscore_support(y_test, test_pred, average='macro')
    print("Test -- f1 is {} w/ threshold {}".format(f1, best_tuned_threshold))

Test -- f1 is 0.8843884222621656 w/ threshold 0.993993993993994


## write the output 

In [54]:
from collections import defaultdict
rows = []
question_rows_map = defaultdict(list)

for i, (pred, true_label) in enumerate(zip(test_pred, y_test)):
    pred = pred.data.tolist()[0]
    true_label = true_label.tolist()
    if pred:
        row = df_test.iloc[i]
        men_entity_label = '_'.join(m_labels_test[i].split(';')[-1].split())
        question_rows_map[row.Question].append(('http://dbpedia.org/resource/{}'.format(men_entity_label), 1.0))
#         print(row.Question, question_rows_map[row.Question])

for key, value in question_rows_map.items():
    rows.append([key, [value]])

df_output = pd.DataFrame(rows)
df_output.head()
df_output.to_csv("output/purename_nway_alpha09.csv")

In [148]:
for name, mod in bestModel.named_children():
    print("========={}=========".format(name))
    if type(mod) == nn.ModuleList:
        for each_op in mod:
            print(each_op.AND.cdd())
    else:
        print(mod, mod.cdd())

(tensor(8.6251, grad_fn=<SelectBackward>), tensor([19.7417, 10.8246], grad_fn=<SliceBackward>))
(tensor(8.4585, grad_fn=<SelectBackward>), tensor([19.1825, 11.0743], grad_fn=<SliceBackward>))
(tensor(7.0526, grad_fn=<SelectBackward>), tensor([16.0262,  8.7171], grad_fn=<SliceBackward>))
(tensor(4.1313, grad_fn=<SelectBackward>), tensor([7.7373, 6.6682], grad_fn=<SliceBackward>))
and_lukasiewicz(
  (cdd): cdd_lnn()
) (tensor(2.8722, grad_fn=<SelectBackward>), tensor([6.9262, 3.3761], grad_fn=<SliceBackward>))


In [118]:
# beta, argument_wts = bestModel.predicate_and.cdd()
# print("beta (post-training): " + str(beta.item()))
# print("argument weights (post-training): " + str(argument_wts.detach()))

# for each_op in bestModel.sim_disjunction_or_ops:
#     #lets check the LNN conjunction parameters post-training
#     #do these look different from the pre-training settings?
#     beta, argument_wts = each_op.AND.cdd()
#     print("beta (post-training): " + str(beta.item()))
#     print("argument weights (post-training): " + str(argument_wts.detach()))

beta (post-training): 2.8722004890441895
argument weights (post-training): tensor([6.9262, 3.3761])
beta (post-training): 8.625083923339844
argument weights (post-training): tensor([19.7417, 10.8246])
beta (post-training): 8.45850658416748
argument weights (post-training): tensor([19.1825, 11.0743])
beta (post-training): 7.052649021148682
argument weights (post-training): tensor([16.0262,  8.7171])
beta (post-training): 4.1312785148620605
argument weights (post-training): tensor([7.7373, 6.6682])


# Other models

In [None]:
## ContextLNN

In [14]:
# Sanity Check
model = ContextLNN(0.8, 2, False)
print(model(x_train, m_labels_train))

loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

def evaluate(eval_model, x_eval, y_eval, m_labels_eval):
    eval_model.eval()
    with torch.no_grad():
        val_pred = eval_model(x_eval, m_labels_eval)
        loss = loss_fn(val_pred, y_eval)
        val_pred_ = val_pred > 0.5
        print("val loss", loss)
        prec, recall, f1, _ = precision_recall_fscore_support(y_eval, val_pred_, average='macro')
        print("f1 w/ 0.5 threshold", f1)
    return loss, f1, val_pred


best_pred = None
best_val_f1, best_val_loss = 0, 10000

for iter in range(200):

    model.train(True)
    optimizer.zero_grad()

    yhat = model(x_train, m_labels_train)
    loss = loss_fn(yhat, y_train)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    loss.backward()
    optimizer.step()

    val_loss, val_f1, val_pred = evaluate(model, x_val, y_val, m_labels_val)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_f1 = val_f1
        best_pred = val_pred
        torch.save(model.state_dict(), "best_ContextLNN.pt")


tensor([[2.9200e-04],
        [1.5054e-07],
        [9.9943e-01],
        ...,
        [2.6379e-04],
        [1.0349e-05],
        [2.5376e-04]], grad_fn=<SWhereBackward>)
Iteration 0: 0.2859273850917816
val loss tensor(0.2682)
f1 w/ 0.5 threshold 0.5848549075800495
Iteration 1: 0.2410481721162796
val loss tensor(0.2264)
f1 w/ 0.5 threshold 0.5960588363785881
Iteration 2: 0.20673899352550507
val loss tensor(0.1875)
f1 w/ 0.5 threshold 0.6139278208619465
Iteration 3: 0.1752336174249649
val loss tensor(0.1554)
f1 w/ 0.5 threshold 0.6279422651402675
Iteration 4: 0.1513768881559372
val loss tensor(0.1329)
f1 w/ 0.5 threshold 0.6470300283758199
Iteration 5: 0.1327628642320633
val loss tensor(0.1150)
f1 w/ 0.5 threshold 0.6585167134759234
Iteration 6: 0.11826548725366592
val loss tensor(0.1046)
f1 w/ 0.5 threshold 0.6729966604011413
Iteration 7: 0.10744988918304443
val loss tensor(0.0941)
f1 w/ 0.5 threshold 0.6797538720201313
Iteration 8: 0.09956284612417221
val loss tensor(0.0883)
f1 w/ 0.

Iteration 87: 0.04431500658392906
val loss tensor(0.0404)
f1 w/ 0.5 threshold 0.7727933173225794
Iteration 88: 0.04427635297179222
val loss tensor(0.0404)
f1 w/ 0.5 threshold 0.7727933173225794
Iteration 89: 0.04423997551202774
val loss tensor(0.0404)
f1 w/ 0.5 threshold 0.7727933173225794
Iteration 90: 0.04420547932386398
val loss tensor(0.0403)
f1 w/ 0.5 threshold 0.7727933173225794
Iteration 91: 0.044172924011945724
val loss tensor(0.0403)
f1 w/ 0.5 threshold 0.7727933173225794
Iteration 92: 0.04414163529872894
val loss tensor(0.0402)
f1 w/ 0.5 threshold 0.7727933173225794
Iteration 93: 0.044053349643945694
val loss tensor(0.0402)
f1 w/ 0.5 threshold 0.7758931189010344
Iteration 94: 0.043939944356679916
val loss tensor(0.0401)
f1 w/ 0.5 threshold 0.7758931189010344
Iteration 95: 0.04386923462152481
val loss tensor(0.0401)
f1 w/ 0.5 threshold 0.7758931189010344
Iteration 96: 0.04381221905350685
val loss tensor(0.0400)
f1 w/ 0.5 threshold 0.7758931189010344
Iteration 97: 0.04376303777

Iteration 177: 0.03976478427648544
val loss tensor(0.0368)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 178: 0.039749402552843094
val loss tensor(0.0368)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 179: 0.03973430022597313
val loss tensor(0.0368)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 180: 0.03972012177109718
val loss tensor(0.0368)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 181: 0.03970617800951004
val loss tensor(0.0366)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 182: 0.039692845195531845
val loss tensor(0.0366)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 183: 0.03968033939599991
val loss tensor(0.0365)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 184: 0.03966781497001648
val loss tensor(0.0364)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 185: 0.03964666277170181
val loss tensor(0.0364)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 186: 0.0396248921751976
val loss tensor(0.0364)
f1 w/ 0.5 threshold 0.7960179928064651
Iteration 187: 0.03

In [21]:
# tune on val set

print("Val -- The best f1 is {} w/ naive threshold 0.5".format(best_val_f1))

best_tuned_threshold = 0.5
best_tuned_f1 = best_val_f1

for threshold_ in np.linspace(0.0, 1.0, num=1000):
    y_val_preds = best_pred >= threshold_
    prec, recall, f1, _ = precision_recall_fscore_support(y_val, y_val_preds, average='macro')
    if f1 > best_tuned_f1:
        best_tuned_threshold = threshold_
        best_tuned_f1 = f1
print("Val -- After tuning, the best f1 is {} w/ threshold {}".format(best_tuned_f1, best_tuned_threshold))


bestModel = ContextLNN(0.9, 2, False)
bestModel.load_state_dict(torch.load("best_ContextLNN.pt"))
bestModel.eval()

with torch.no_grad():
    test_pred = bestModel(x_test, m_labels_test)
    test_pred = test_pred >= best_tuned_threshold
    prec, recall, f1, _ = precision_recall_fscore_support(y_test, test_pred, average='macro')
    print("Test -- f1 is {} w/ threshold {}".format(f1, best_tuned_threshold))
    print("prec, recall, f1", prec, recall, f1)

Val -- The best f1 is 0.7998889248676659 w/ naive threshold 0.5


  _warn_prf(average, modifier, msg_start, len(result))


Val -- After tuning, the best f1 is 0.8415493457427705 w/ threshold 0.993993993993994


IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [153]:
for name, mod in bestModel.named_children():
    print("========={}=========".format(name))
    if name == 'sim_disjunction_or_ops':
        for each_op in mod:
            print(each_op.AND.cdd())
    elif name == 'predicate_and_ops':
        for each_op in mod:
            print(each_op.cdd())
    else:
        print(mod, mod.cdd())

(tensor(5.8912, grad_fn=<SelectBackward>), tensor([12.1745,  8.3585], grad_fn=<SliceBackward>))
(tensor(8.8607, grad_fn=<SelectBackward>), tensor([19.9964, 11.0276], grad_fn=<SliceBackward>))
(tensor(7.7926, grad_fn=<SelectBackward>), tensor([17.1775,  9.7577], grad_fn=<SliceBackward>))
(tensor(4.7304, grad_fn=<SelectBackward>), tensor([8.7144, 7.7403], grad_fn=<SliceBackward>))
(tensor(5.3318, grad_fn=<SelectBackward>), tensor([15.9246,  6.5601], grad_fn=<SliceBackward>))
(tensor(1.8147, grad_fn=<SelectBackward>), tensor([2.9074, 2.0872], grad_fn=<SliceBackward>))


In [None]:
# Complex

In [154]:
# Sanity Check
model = ComplexRuleLNN(0.8, 2, False)
print(model(x_train, m_labels_train))

loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

def evaluate(eval_model, x_eval, y_eval, m_labels_eval):
    eval_model.eval()
    with torch.no_grad():
        val_pred = eval_model(x_eval, m_labels_eval)
        loss = loss_fn(val_pred, y_eval)
        val_pred_ = val_pred > 0.5
        print("val loss", loss)
        prec, recall, f1, _ = precision_recall_fscore_support(y_eval, val_pred_, average='macro')
        print("f1 w/ 0.5 threshold", f1)
    return loss, f1, val_pred


best_pred = None
best_val_f1, best_val_loss = 0, 10000

for iter in range(200):

    model.train(True)
    optimizer.zero_grad()

    yhat = model(x_train, m_labels_train)
    loss = loss_fn(yhat, y_train)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    loss.backward()
    optimizer.step()

    val_loss, val_f1, val_pred = evaluate(model, x_val, y_val, m_labels_val)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_f1 = val_f1
        best_pred = val_pred
        torch.save(model.state_dict(), "best_complex.pt")


tensor([[9.6863e-03],
        [3.4976e-04],
        [1.0000e+00],
        ...,
        [3.7682e-04],
        [3.5048e-04],
        [3.5691e-04]], grad_fn=<RsubBackward1>)
Iteration 0: 0.9573822617530823
val loss tensor(0.8644)
f1 w/ 0.5 threshold 0.5389048163593573
Iteration 1: 0.7983121871948242
val loss tensor(0.7087)
f1 w/ 0.5 threshold 0.5500277705571773
Iteration 2: 0.6616075038909912
val loss tensor(0.5831)
f1 w/ 0.5 threshold 0.5611543117468767
Iteration 3: 0.5385414958000183
val loss tensor(0.4938)
f1 w/ 0.5 threshold 0.5713164135102224
Iteration 4: 0.45025917887687683
val loss tensor(0.4145)
f1 w/ 0.5 threshold 0.5832352587535825
Iteration 5: 0.3785131275653839
val loss tensor(0.3597)
f1 w/ 0.5 threshold 0.5946052934809039
Iteration 6: 0.3310108780860901
val loss tensor(0.3109)
f1 w/ 0.5 threshold 0.6049645533978529
Iteration 7: 0.2867151200771332
val loss tensor(0.2659)
f1 w/ 0.5 threshold 0.6171883180043028
Iteration 8: 0.25164327025413513
val loss tensor(0.2293)
f1 w/ 0.5 t

val loss tensor(0.0646)
f1 w/ 0.5 threshold 0.7502145857328353
Iteration 85: 0.06548275798559189
val loss tensor(0.0645)
f1 w/ 0.5 threshold 0.7502145857328353
Iteration 86: 0.0653853565454483
val loss tensor(0.0644)
f1 w/ 0.5 threshold 0.7502145857328353
Iteration 87: 0.06529440730810165
val loss tensor(0.0643)
f1 w/ 0.5 threshold 0.7502145857328353
Iteration 88: 0.06520730257034302
val loss tensor(0.0642)
f1 w/ 0.5 threshold 0.7502145857328353
Iteration 89: 0.06512439996004105
val loss tensor(0.0641)
f1 w/ 0.5 threshold 0.7528387439472204
Iteration 90: 0.06504955887794495
val loss tensor(0.0641)
f1 w/ 0.5 threshold 0.7528387439472204
Iteration 91: 0.0649760365486145
val loss tensor(0.0640)
f1 w/ 0.5 threshold 0.7528387439472204
Iteration 92: 0.06483722478151321
val loss tensor(0.0639)
f1 w/ 0.5 threshold 0.7528387439472204
Iteration 93: 0.06472838670015335
val loss tensor(0.0639)
f1 w/ 0.5 threshold 0.7528387439472204
Iteration 94: 0.06463892012834549
val loss tensor(0.0638)
f1 w/ 0.

val loss tensor(0.0580)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 169: 0.059609394520521164
val loss tensor(0.0579)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 170: 0.05957398936152458
val loss tensor(0.0579)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 171: 0.0595400333404541
val loss tensor(0.0578)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 172: 0.059509534388780594
val loss tensor(0.0578)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 173: 0.059479229152202606
val loss tensor(0.0577)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 174: 0.05945027247071266
val loss tensor(0.0577)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 175: 0.0594213642179966
val loss tensor(0.0576)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 176: 0.05939330905675888
val loss tensor(0.0576)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 177: 0.05936622619628906
val loss tensor(0.0576)
f1 w/ 0.5 threshold 0.7610366540467318
Iteration 178: 0.05933916196227074
val loss tensor(0.0

In [155]:
# tune on val set

print("Val -- The best f1 is {} w/ naive threshold 0.5".format(best_val_f1))

best_tuned_threshold = 0.5
best_tuned_f1 = best_val_f1

for threshold_ in np.linspace(0.0, 1.0, num=1000):
    y_val_preds = best_pred >= threshold_
    prec, recall, f1, _ = precision_recall_fscore_support(y_val, y_val_preds, average='macro')
    if f1 > best_tuned_f1:
        best_tuned_threshold = threshold_
        best_tuned_f1 = f1
print("Val -- After tuning, the best f1 is {} w/ threshold {}".format(best_tuned_f1, best_tuned_threshold))


bestModel = ComplexRuleLNN(0.8, 2, False)
bestModel.load_state_dict(torch.load("best_complex.pt"))
bestModel.eval()

with torch.no_grad():
    test_pred = bestModel(x_test, m_labels_test)
    test_pred = test_pred >= best_tuned_threshold
    prec, recall, f1, _ = precision_recall_fscore_support(y_test, test_pred, average='macro')
    print("Test -- f1 is {} w/ threshold {}".format(f1, best_tuned_threshold))

Val -- The best f1 is 0.7638835403228394 w/ naive threshold 0.5
Val -- After tuning, the best f1 is 0.7856135050703569 w/ threshold 0.992992992992993
Test -- f1 is 0.8164090967900519 w/ threshold 0.992992992992993


In [164]:
for name1, mod1 in bestModel.named_children():
    for name, mod in mod1.named_children():
        print("========={}=========".format(name))
        if name == 'sim_disjunction_or_ops':
            for each_op in mod:
                print(each_op.AND.cdd())
        elif name == 'predicate_and_ops':
            for each_op in mod:
                print(each_op.cdd())
        elif 'and' in name:
            print(name, mod.cdd())
        else:
            print(name, mod.AND.cdd())
#     if 'and' in name1:
#         print(name1, mod1.cdd())
#     elif 'or' in name1:
#         print(name1, mod1.AND.cdd())

(tensor(5.1019, grad_fn=<SelectBackward>), tensor([10.4697,  8.0609], grad_fn=<SliceBackward>))
(tensor(5.7117, grad_fn=<SelectBackward>), tensor([ 8.2640, 11.7477], grad_fn=<SliceBackward>))
(tensor(6.4294, grad_fn=<SelectBackward>), tensor([12.9908,  8.3084], grad_fn=<SliceBackward>))
(tensor(3.8584, grad_fn=<SelectBackward>), tensor([6.8998, 6.3426], grad_fn=<SliceBackward>))
predicate_and (tensor(4.6245, grad_fn=<SelectBackward>), tensor([13.1790,  5.6998], grad_fn=<SliceBackward>))
(tensor(6.3645, grad_fn=<SelectBackward>), tensor([13.3272,  9.0812], grad_fn=<SliceBackward>))
(tensor(6.9162, grad_fn=<SelectBackward>), tensor([14.1029,  9.5137], grad_fn=<SliceBackward>))
(tensor(6.0238, grad_fn=<SelectBackward>), tensor([13.0749,  7.8272], grad_fn=<SliceBackward>))
(tensor(4.1672, grad_fn=<SelectBackward>), tensor([6.7004, 7.7094], grad_fn=<SliceBackward>))
(tensor(4.4623, grad_fn=<SelectBackward>), tensor([12.3697,  5.6580], grad_fn=<SliceBackward>))
(tensor(4.3978, grad_fn=<Selec

In [165]:
for name, param in bestModel.named_parameters():
    print(name, param)

pureNameRule.sim_disjunction_or_ops.0.AND.cdd.gamma Parameter containing:
tensor([[0.6911]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.0.AND.cdd.mu Parameter containing:
tensor([[1.5432, 0.1545, 0.8296]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.1.AND.cdd.gamma Parameter containing:
tensor([[0.4595]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.1.AND.cdd.mu Parameter containing:
tensor([[-0.3108,  1.7886,  1.6416]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.2.AND.cdd.gamma Parameter containing:
tensor([[0.6444]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.2.AND.cdd.mu Parameter containing:
tensor([[ 1.9484, -1.4607,  2.6722]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.3.AND.cdd.gamma Parameter containing:
tensor([[0.1029]], requires_grad=True)
pureNameRule.sim_disjunction_or_ops.3.AND.cdd.mu Parameter containing:
tensor([[0.4294, 0.0292, 0.2391]], requires_grad=True)
pureNameRule.predicate_and.cdd.gamma P

In [109]:

for each_op in bestModel.sim_disjunction_or_ops:
    #lets check the LNN conjunction parameters post-training
    #do these look different from the pre-training settings?
    beta, argument_wts = each_op.AND.cdd()
    print("beta (post-training): " + str(beta.item()))
    print("argument weights (post-training): " + str(argument_wts.detach()))

for each_op in bestModel.sim_disjunction_or_ops:
    #lets check the LNN conjunction parameters post-training
    #do these look different from the pre-training settings?
    beta, argument_wts = each_op.AND.cdd()
    print("beta (post-training): " + str(beta.item()))
    print("argument weights (post-training): " + str(argument_wts.detach()))

beta (post-training): 4.196691513061523
argument weights (post-training): tensor([7.9233, 7.2595])
beta (post-training): 3.902745246887207
argument weights (post-training): tensor([6.9828, 6.3890])
beta (post-training): 4.169642925262451
argument weights (post-training): tensor([7.6434, 7.0587])
beta (post-training): 4.3188300132751465
argument weights (post-training): tensor([7.2704, 8.2539])
beta (post-training): 4.196691513061523
argument weights (post-training): tensor([7.9233, 7.2595])
beta (post-training): 3.902745246887207
argument weights (post-training): tensor([6.9828, 6.3890])
beta (post-training): 4.169642925262451
argument weights (post-training): tensor([7.6434, 7.0587])
beta (post-training): 4.3188300132751465
argument weights (post-training): tensor([7.2704, 8.2539])


## Code for XOR example

In [136]:
#to train a xor we need its truth table
x = torch.from_numpy(np.array([[0, 0], \
                               [0, 1], \
                               [1, 0], \
                               [1, 1]])).float()

#the target values for each row in the truth table (xor)
y = torch.from_numpy(np.array([[0], \
                               [1], \
                               [1], \
                               [0]])).float()

In [140]:
class xorLNN(nn.Module):
    def __init__(self, alpha, arity, slack):
        super(xorLNN, self).__init__()
        self.op_and1 = and_lukasiewicz(alpha, arity, slack)
        self.op_and2 = and_lukasiewicz(alpha, arity, slack)
        self.op_or = or_lukasiewicz(alpha, arity, slack)
    
    def forward(self, x):
        x0 = x[:,0].view(-1,1)
        print(x0)
        x1 = x[:,1].view(-1,1)
        print(x1)
        print(torch.cat((x0, negation(x1)), 1))
        yhat = self.op_or(torch.cat((self.op_and1(torch.cat((x0, negation(x1)), 1)), \
                            self.op_and2(torch.cat((negation(x0), x1), 1))), 1))
        return yhat

In [141]:
model = xorLNN(0.8, 2, False)
model(x)

tensor([[0.],
        [0.],
        [1.],
        [1.]])
tensor([[0.],
        [1.],
        [0.],
        [1.]])
tensor([[0., 1.],
        [0., 0.],
        [1., 1.],
        [1., 0.]])


tensor([[4.6349e-04],
        [9.9932e-01],
        [9.9967e-01],
        [4.6349e-04]], grad_fn=<RsubBackward1>)

In [25]:
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [26]:
for iter in range(100):
    model.train()
    optimizer.zero_grad()

    yhat = model(x)
    loss = loss_fn(yhat, y)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    
    loss.backward()
    optimizer.step()


Iteration 0: 0.00041395798325538635
Iteration 1: 0.0003413597878534347
Iteration 2: 0.00027990160742774606
Iteration 3: 0.00022847841319162399
Iteration 4: 0.00018598556926008314
Iteration 5: 0.00015122962940949947
Iteration 6: 0.0001230025663971901
Iteration 7: 0.00010024568473454565
Iteration 8: 8.197502756956965e-05
Iteration 9: 6.734087219228968e-05
Iteration 10: 5.5627755500609055e-05
Iteration 11: 4.625439760275185e-05
Iteration 12: 3.872895103995688e-05
Iteration 13: 3.267884312663227e-05
Iteration 14: 2.7761290766648017e-05
Iteration 15: 2.3767666789353825e-05
Iteration 16: 2.051913361356128e-05
Iteration 17: 1.7821967048803344e-05
Iteration 18: 1.5646355677745305e-05
Iteration 19: 1.3783679605694488e-05
Iteration 20: 1.2263739336049184e-05
Iteration 21: 1.0967321941279806e-05
Iteration 22: 9.89442560239695e-06
Iteration 23: 8.970543603936676e-06
Iteration 24: 8.165872714016587e-06
Iteration 25: 7.480413842131384e-06
Iteration 26: 6.899264008097816e-06
Iteration 27: 6.407521595

In [28]:
#this is a hyperparameter
alpha = 0.8

op_and1 = and_lukasiewicz(alpha, 2, False)
op_and2 = and_lukasiewicz(alpha, 2, False)
op_or = or_lukasiewicz(alpha, 2, False)

#to train a xor we need its truth table
x = torch.from_numpy(np.array([[0, 0], \
                               [0, 1], \
                               [1, 0], \
                               [1, 1]])).float()

#the target values for each row in the truth table (xor)
y = torch.from_numpy(np.array([[0], \
                               [1], \
                               [1], \
                               [0]])).float()

loss_fn = nn.BCELoss()
optimizer = optim.Adam([{'params': op_or.parameters()}, \
                        {'params': op_and1.parameters()}, \
                        {'params': op_and2.parameters()}], lr=0.1)

for iter in range(100):
    op_or.train()
    op_and1.train()
    op_and2.train()
    optimizer.zero_grad()

    x0 = x[:,0].view(-1,1)
    x1 = x[:,1].view(-1,1)
    yhat = op_or(torch.cat((op_and1(torch.cat((x0, negation(x1)), 1)), \
                            op_and2(torch.cat((negation(x0), x1), 1))), 1))
    loss = loss_fn(yhat, y)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    loss.backward()
    optimizer.step()

#check to see output of xor post-training
x0 = x[:,0].view(-1,1)
x1 = x[:,1].view(-1,1)
yhat = op_or(torch.cat((op_and1(torch.cat((x0, negation(x1)), 1)), \
                        op_and2(torch.cat((negation(x0), x1), 1))), 1))
check_values = torch.cat((yhat, y), 1)
print("------- Checking outputs (left) vs ground truth (right): -----")
print(check_values.detach())

#LNN parameters: post-training (we have 3 sets of beta, argument weights)
print("--------------- LNN Parameters (post-training) ---------------")
beta_or, argument_wts_or = op_or.AND.cdd()
beta_and1, argument_wts_and1 = op_and1.cdd()
beta_and2, argument_wts_and2 = op_and2.cdd()

np.set_printoptions(precision=3, suppress=True)
print("OR (beta, argument weights): " \
      + str(np.around(beta_or.item(), decimals=3)) + " " \
      + str(argument_wts_or.detach().numpy()))
print("AND1 (beta, argument weights): " \
      + str(np.around(beta_and1.item(), decimals=3)) + " " \
      + str(argument_wts_and1.detach().numpy()))
print("AND2 (beta, argument weights): " \
      + str(np.around(beta_and2.item(), decimals=3)) + " " \
      + str(argument_wts_and2.detach().numpy()))

Iteration 0: 0.0005070384358987212
Iteration 1: 0.0004226093296892941
Iteration 2: 0.00035011349245905876
Iteration 3: 0.0002885941066779196
Iteration 4: 0.0002370504371356219
Iteration 5: 0.00019430331303738058
Iteration 6: 0.00015917410200927407
Iteration 7: 0.00013051435234956443
Iteration 8: 0.0001072355080395937
Iteration 9: 8.839827933115885e-05
Iteration 10: 7.324236503336579e-05
Iteration 11: 6.0992642829660326e-05
Iteration 12: 5.106781463837251e-05
Iteration 13: 4.3065447243861854e-05
Iteration 14: 3.6598037695512176e-05
Iteration 15: 3.130791810690425e-05
Iteration 16: 2.6941725081996992e-05
Iteration 17: 2.3410046196659096e-05
Iteration 18: 2.048934402409941e-05
Iteration 19: 1.8030596038443036e-05
Iteration 20: 1.6003998098312877e-05
Iteration 21: 1.4275432477006689e-05
Iteration 22: 1.2785292710759677e-05
Iteration 23: 1.1593181625357829e-05
Iteration 24: 1.0550087608862668e-05
Iteration 25: 9.641104952606838e-06
Iteration 26: 8.851335223880596e-06
Iteration 27: 8.1807766

In [None]:
# arity should be 2

In [150]:
# class PureNameLNN(nn.Module):
#     def __init__(self, alpha, sim_arity=4, rule_arity=2, slack=None):
#         super(PureNameLNN, self).__init__()
#         self.threshold = 0.5
        
#         self.sim_disjunction_or = or_lukasiewicz(alpha, sim_arity, slack)
    
#     def forward(self, x, mention_labels=None):
#         """
#             x: scores['jw'], scores['jacc'], scores['lev'], scores['spacy'], 
#                normalized_ref_scores[ref_idx], normalized_ctx_scores[ctx_idx]
#         """
#         yhat = None
        
#         # RULE 1: lookup predicate
#         lookup_features = x[:,5]
#         print("lookup_features", lookup_features)
        
#         # RULE 2: similarity predicate(mention==label AND Jacc(m, lb) AND Lev(m, lb) AND Jaro(m, lb))
#         feature_list = []
#         # rule 2 (1) mention==label
#         mentions = np.array([m[0].lower() for m in mention_labels])
#         labels = np.array([m[1].lower() for m in mention_labels])
#         exact_match_features = torch.from_numpy(np.array(mentions == labels).astype(float)).float()
#         feature_list.append(exact_match_features)
#         print("exact_match_features", exact_match_features)
        
#         # rule 2 (2)-(4) Jaro(m, lb) AND Jacc(m, lb) AND Lev(m, lb))
#         sim_features = x[:, 0:3]
#         print(sim_features)

#         return yhat