In [1]:
import sys
sys.path.append('../../src/meta_rule/')

from lnn_operators import and_lukasiewicz, or_lukasiewicz, negation
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

In [2]:
df = pd.read_csv("train.csv")
# df = df.loc[22:25]

In [3]:
features = np.array([np.fromstring(s[1:-1], dtype=np.float, sep=', ') for s in df.Features.values])

In [4]:
#to train a xor we need its truth table
X = torch.from_numpy(features).float()
print(X)
#the target values for each row in the truth table (xor)
Y = torch.from_numpy(df.Label.values).float()
print(Y)
# mention_labels (cannot convert string explicitly)
mention_labels = df.Mention_label.values
print(mention_labels)

tensor([[0.5158, 0.0000, 0.1579, 0.4390, 1.0000, 1.0000],
        [0.4815, 0.0000, 0.0000, 0.4980, 0.8569, 0.4000],
        [0.4815, 0.0000, 0.0000, 0.4292, 0.8430, 0.5000],
        ...,
        [0.5225, 0.2308, 0.2564, 0.7010, 0.2003, 0.7100],
        [0.5743, 0.5000, 0.4082, 0.8074, 0.2003, 0.8200],
        [0.9379, 0.6000, 0.6897, 0.7939, 0.1000, 0.8200]])
tensor([0., 0., 0.,  ..., 0., 0., 0.])
['GMT;Greenwich Mean Time' 'GMT;UTC+02:00' 'GMT;UTC+08:00' ...
 'Houses Of Parliament;List of people who have spoken to both Houses of the United Kingdom Parliament'
 'Houses Of Parliament;Statue of Margaret Thatcher, Houses of Parliament'
 'Houses Of Parliament;Houses of Parliament Act 1837']


In [5]:
mention_labels.shape

(26755,)

In [6]:
x_train, x_test, y_train, y_test, m_labels_train, m_labels_test = train_test_split(X, Y, mention_labels, test_size=0.2,train_size=0.8)

In [7]:
x_train

tensor([[0.8480, 0.2500, 0.2400, 0.7001, 0.1076, 1.0000],
        [0.4423, 0.0000, 0.0769, 0.3746, 0.2174, 0.7500],
        [0.9059, 0.0000, 0.5294, 0.3579, 0.1502, 1.0000],
        ...,
        [0.5569, 0.0000, 0.2051, 0.4970, 0.4131, 0.5000],
        [0.6450, 0.0000, 0.1081, 0.3771, 0.3007, 0.6000],
        [0.6111, 0.0000, 0.3333, 0.6454, 0.2704, 0.0000]])

In [8]:
print('train', x_train.shape)
print('val', x_test.shape)

train torch.Size([21404, 6])
val torch.Size([5351, 6])


In [9]:
class PureNameLNN(nn.Module):
    def __init__(self, alpha, arity, slack=None):
        super(PureNameLNN, self).__init__()
        self.threshold = 0.5
        
        self.sim_disjunction_or_ops = nn.ModuleList([or_lukasiewicz(alpha, arity, slack) for i in range(4)])
        self.predicate_and = and_lukasiewicz(alpha, arity, slack)
    
    def forward(self, x, mention_labels=None):
        """
            x: scores['jw'], scores['jacc'], scores['lev'], scores['spacy'], 
               normalized_ref_scores[ref_idx], normalized_ctx_scores[ctx_idx]
        """
        yhat = None
        
        ####### RULE 1: lookup predicate #######
        lookup_features = x[:,4].view(-1, 1)
#         print("lookup_features", lookup_features)
        
        
        ####### RULE 2: similarity predicate(mention==label AND Jacc(m, lb) AND Lev(m, lb) AND Jaro(m, lb)) #######
        feature_list = []
        # rule 2 (1) mention==label
        mentions = np.array([m[0].lower() for m in mention_labels])
        labels = np.array([m[1].lower() for m in mention_labels])
        exact_match_features = torch.from_numpy(np.array(mentions == labels).astype(float)).float().view(-1,1)
        feature_list.append(exact_match_features)
#         print("exact_match_features", exact_match_features)
        
        # rule 2 (2) Jacc(mention, label)
        jacc_features = x[:, 1].view(-1,1)
#         jacc_features = torch.clamp(jacc_features, min=self.threshold, max=1.0)
        jacc_features_ = torch.where(jacc_features>=self.threshold, jacc_features, torch.zeros_like(jacc_features))
        feature_list.append(jacc_features_)
#         print("jacc_features", jacc_features)
#         print("jacc_features*mask", jacc_features_)
        
        # rule 2 (3) Lev(mention, label)
        lev_features = x[:, 2].view(-1,1)
        lev_features_ = torch.where(lev_features>=self.threshold, lev_features, torch.zeros_like(lev_features))
        feature_list.append(lev_features_)
#         print("lev_features", lev_features)
#         print("lev_features*mask", lev_features_)
        
        # rule 2 (4) Jaro(mention, label)
        jaro_features = x[:, 0].view(-1,1)
        jaro_features_ = torch.where(jaro_features>=self.threshold, jaro_features, torch.zeros_like(jaro_features))
        feature_list.append(jaro_features_)
#         print("jaro_features", jaro_features)
#         print("jaro_features*mask", jaro_features_)
        
        # disjunction of (1) to (4)
        disjunction_result = feature_list[0]
        for i in range(0, 3):
            disjunction_result = self.sim_disjunction_or_ops[i](torch.cat((disjunction_result, feature_list[i+1]), 1))
#             print("disjunction_result", disjunction_result)
        
        # RULE 1 + RULE 2
        yhat = self.predicate_and(torch.cat((lookup_features, disjunction_result), 1))
#         print('yhat', yhat)
        return yhat

In [343]:
# Sanity Check
model = PureNameLNN(0.8, 2, False)
print(model(x_train, m_labels_train))

loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

def evaluate(eval_model, x_test, y_test, m_labels_test):
    eval_model.eval()
    with torch.no_grad():
        test_pred = eval_model(x_test, m_labels_test)
        loss = loss_fn(test_pred, y_test)
        test_pred_ = test_pred > 0.5
        print("val loss", loss)
        prec, recall, f1, _ = precision_recall_fscore_support(y_test, test_pred_, average='macro')
        print("f1 w/ 0.5 threshold", f1)
    return loss, f1, test_pred
    

tensor([[6.1936e-04],
        [7.5843e-04],
        [3.6290e-04],
        ...,
        [4.9775e-06],
        [4.8795e-04],
        [6.4969e-04]], grad_fn=<SWhereBackward>)




In [344]:
best_pred = None
best_val_f1, best_val_loss = 0, 10000

for iter in range(100):

    model.train(True)
    optimizer.zero_grad()

    yhat = model(x_train, m_labels_train)
    loss = loss_fn(yhat, y)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    loss.backward()
    optimizer.step()

    val_loss, val_f1, test_pred = evaluate(model, x_test, y_test, m_labels_test)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_f1 = val_f1
        best_pred = test_pred
    

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


Iteration 0: 0.31788986921310425
val loss tensor(0.1379)
f1 0.6267516883007955
Iteration 1: 0.29160672426223755
val loss tensor(0.1152)
f1 0.6425400850238854
Iteration 2: 0.2677815556526184
val loss tensor(0.0993)
f1 0.6619434842645824
Iteration 3: 0.24897222220897675
val loss tensor(0.0876)
f1 0.679162468121619
Iteration 4: 0.2356056421995163
val loss tensor(0.0803)
f1 0.6903493737964728
Iteration 5: 0.2250492423772812
val loss tensor(0.0744)
f1 0.6985336672659015
Iteration 6: 0.21670205891132355
val loss tensor(0.0694)
f1 0.707385029876227
Iteration 7: 0.20997031033039093
val loss tensor(0.0642)
f1 0.7153339942863262
Iteration 8: 0.2028883397579193
val loss tensor(0.0588)
f1 0.7349749657805633
Iteration 9: 0.19772924482822418
val loss tensor(0.0555)
f1 0.7471649971649972
Iteration 10: 0.19393067061901093
val loss tensor(0.0533)
f1 0.7514973032527512
Iteration 11: 0.19061918556690216
val loss tensor(0.0522)
f1 0.753717767038504
Iteration 12: 0.18781499564647675
val loss tensor(0.0510)

In [357]:
# y_test and test_pred

print("Naive 0.5 threshold best f1:", best_val_f1)

best_tuned_threshold = 0.5
best_tuned_f1 = best_val_f1

for threshold_ in np.linspace(0.0, 1.0, num=10000):
    y_test_preds = test_pred >= threshold_
    prec, recall, f1, _ = precision_recall_fscore_support(y_test, y_test_preds, average='macro')
    if f1 > best_tuned_f1:
        best_tuned_threshold = threshold_
        best_tuned_f1 = f1
print("After tuning, the best f1 is {} w/ threshold {}".format(best_tuned_f1, best_tuned_threshold))

Naive 0.5 threshold best f1: 0.7540401334829849
After tuning, the best f1 is 0.8993889254489047 w/ threshold 0.9962996299629964


## Code for XOR example

In [136]:
#to train a xor we need its truth table
x = torch.from_numpy(np.array([[0, 0], \
                               [0, 1], \
                               [1, 0], \
                               [1, 1]])).float()

#the target values for each row in the truth table (xor)
y = torch.from_numpy(np.array([[0], \
                               [1], \
                               [1], \
                               [0]])).float()

In [140]:
class xorLNN(nn.Module):
    def __init__(self, alpha, arity, slack):
        super(xorLNN, self).__init__()
        self.op_and1 = and_lukasiewicz(alpha, arity, slack)
        self.op_and2 = and_lukasiewicz(alpha, arity, slack)
        self.op_or = or_lukasiewicz(alpha, arity, slack)
    
    def forward(self, x):
        x0 = x[:,0].view(-1,1)
        print(x0)
        x1 = x[:,1].view(-1,1)
        print(x1)
        print(torch.cat((x0, negation(x1)), 1))
        yhat = self.op_or(torch.cat((self.op_and1(torch.cat((x0, negation(x1)), 1)), \
                            self.op_and2(torch.cat((negation(x0), x1), 1))), 1))
        return yhat

In [141]:
model = xorLNN(0.8, 2, False)
model(x)

tensor([[0.],
        [0.],
        [1.],
        [1.]])
tensor([[0.],
        [1.],
        [0.],
        [1.]])
tensor([[0., 1.],
        [0., 0.],
        [1., 1.],
        [1., 0.]])


tensor([[4.6349e-04],
        [9.9932e-01],
        [9.9967e-01],
        [4.6349e-04]], grad_fn=<RsubBackward1>)

In [25]:
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [26]:
for iter in range(100):
    model.train()
    optimizer.zero_grad()

    yhat = model(x)
    loss = loss_fn(yhat, y)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    
    loss.backward()
    optimizer.step()


Iteration 0: 0.00041395798325538635
Iteration 1: 0.0003413597878534347
Iteration 2: 0.00027990160742774606
Iteration 3: 0.00022847841319162399
Iteration 4: 0.00018598556926008314
Iteration 5: 0.00015122962940949947
Iteration 6: 0.0001230025663971901
Iteration 7: 0.00010024568473454565
Iteration 8: 8.197502756956965e-05
Iteration 9: 6.734087219228968e-05
Iteration 10: 5.5627755500609055e-05
Iteration 11: 4.625439760275185e-05
Iteration 12: 3.872895103995688e-05
Iteration 13: 3.267884312663227e-05
Iteration 14: 2.7761290766648017e-05
Iteration 15: 2.3767666789353825e-05
Iteration 16: 2.051913361356128e-05
Iteration 17: 1.7821967048803344e-05
Iteration 18: 1.5646355677745305e-05
Iteration 19: 1.3783679605694488e-05
Iteration 20: 1.2263739336049184e-05
Iteration 21: 1.0967321941279806e-05
Iteration 22: 9.89442560239695e-06
Iteration 23: 8.970543603936676e-06
Iteration 24: 8.165872714016587e-06
Iteration 25: 7.480413842131384e-06
Iteration 26: 6.899264008097816e-06
Iteration 27: 6.407521595

In [28]:
#this is a hyperparameter
alpha = 0.8

op_and1 = and_lukasiewicz(alpha, 2, False)
op_and2 = and_lukasiewicz(alpha, 2, False)
op_or = or_lukasiewicz(alpha, 2, False)

#to train a xor we need its truth table
x = torch.from_numpy(np.array([[0, 0], \
                               [0, 1], \
                               [1, 0], \
                               [1, 1]])).float()

#the target values for each row in the truth table (xor)
y = torch.from_numpy(np.array([[0], \
                               [1], \
                               [1], \
                               [0]])).float()

loss_fn = nn.BCELoss()
optimizer = optim.Adam([{'params': op_or.parameters()}, \
                        {'params': op_and1.parameters()}, \
                        {'params': op_and2.parameters()}], lr=0.1)

for iter in range(100):
    op_or.train()
    op_and1.train()
    op_and2.train()
    optimizer.zero_grad()

    x0 = x[:,0].view(-1,1)
    x1 = x[:,1].view(-1,1)
    yhat = op_or(torch.cat((op_and1(torch.cat((x0, negation(x1)), 1)), \
                            op_and2(torch.cat((negation(x0), x1), 1))), 1))
    loss = loss_fn(yhat, y)

    print("Iteration " + str(iter) + ": " + str(loss.item()))
    loss.backward()
    optimizer.step()

#check to see output of xor post-training
x0 = x[:,0].view(-1,1)
x1 = x[:,1].view(-1,1)
yhat = op_or(torch.cat((op_and1(torch.cat((x0, negation(x1)), 1)), \
                        op_and2(torch.cat((negation(x0), x1), 1))), 1))
check_values = torch.cat((yhat, y), 1)
print("------- Checking outputs (left) vs ground truth (right): -----")
print(check_values.detach())

#LNN parameters: post-training (we have 3 sets of beta, argument weights)
print("--------------- LNN Parameters (post-training) ---------------")
beta_or, argument_wts_or = op_or.AND.cdd()
beta_and1, argument_wts_and1 = op_and1.cdd()
beta_and2, argument_wts_and2 = op_and2.cdd()

np.set_printoptions(precision=3, suppress=True)
print("OR (beta, argument weights): " \
      + str(np.around(beta_or.item(), decimals=3)) + " " \
      + str(argument_wts_or.detach().numpy()))
print("AND1 (beta, argument weights): " \
      + str(np.around(beta_and1.item(), decimals=3)) + " " \
      + str(argument_wts_and1.detach().numpy()))
print("AND2 (beta, argument weights): " \
      + str(np.around(beta_and2.item(), decimals=3)) + " " \
      + str(argument_wts_and2.detach().numpy()))

Iteration 0: 0.0005070384358987212
Iteration 1: 0.0004226093296892941
Iteration 2: 0.00035011349245905876
Iteration 3: 0.0002885941066779196
Iteration 4: 0.0002370504371356219
Iteration 5: 0.00019430331303738058
Iteration 6: 0.00015917410200927407
Iteration 7: 0.00013051435234956443
Iteration 8: 0.0001072355080395937
Iteration 9: 8.839827933115885e-05
Iteration 10: 7.324236503336579e-05
Iteration 11: 6.0992642829660326e-05
Iteration 12: 5.106781463837251e-05
Iteration 13: 4.3065447243861854e-05
Iteration 14: 3.6598037695512176e-05
Iteration 15: 3.130791810690425e-05
Iteration 16: 2.6941725081996992e-05
Iteration 17: 2.3410046196659096e-05
Iteration 18: 2.048934402409941e-05
Iteration 19: 1.8030596038443036e-05
Iteration 20: 1.6003998098312877e-05
Iteration 21: 1.4275432477006689e-05
Iteration 22: 1.2785292710759677e-05
Iteration 23: 1.1593181625357829e-05
Iteration 24: 1.0550087608862668e-05
Iteration 25: 9.641104952606838e-06
Iteration 26: 8.851335223880596e-06
Iteration 27: 8.1807766

In [None]:
# arity should be 2

In [150]:
# class PureNameLNN(nn.Module):
#     def __init__(self, alpha, sim_arity=4, rule_arity=2, slack=None):
#         super(PureNameLNN, self).__init__()
#         self.threshold = 0.5
        
#         self.sim_disjunction_or = or_lukasiewicz(alpha, sim_arity, slack)
    
#     def forward(self, x, mention_labels=None):
#         """
#             x: scores['jw'], scores['jacc'], scores['lev'], scores['spacy'], 
#                normalized_ref_scores[ref_idx], normalized_ctx_scores[ctx_idx]
#         """
#         yhat = None
        
#         # RULE 1: lookup predicate
#         lookup_features = x[:,5]
#         print("lookup_features", lookup_features)
        
#         # RULE 2: similarity predicate(mention==label AND Jacc(m, lb) AND Lev(m, lb) AND Jaro(m, lb))
#         feature_list = []
#         # rule 2 (1) mention==label
#         mentions = np.array([m[0].lower() for m in mention_labels])
#         labels = np.array([m[1].lower() for m in mention_labels])
#         exact_match_features = torch.from_numpy(np.array(mentions == labels).astype(float)).float()
#         feature_list.append(exact_match_features)
#         print("exact_match_features", exact_match_features)
        
#         # rule 2 (2)-(4) Jaro(m, lb) AND Jacc(m, lb) AND Lev(m, lb))
#         sim_features = x[:, 0:3]
#         print(sim_features)

#         return yhat