## form tables

In [1]:
import pandas as pd

In [110]:
with open("output_norm.txt") as f:
    lines = f.readlines()

In [112]:
import re
rows = []
columns = ['model', 'binary', 'alpha', 'precision', 'recall', 'f1', 'threshold']
for line in lines:
    items = line.split('; ')
    items = [item.split('=')[-1].strip() for item in items]
    for i in range(2, 7):
        items[i] = float(items[i])
    print(items)
    rows.append(items)

print(rows)
df = pd.DataFrame(rows, columns=columns)
df.to_csv("output_norm.csv")

['purename', 'False', 0.85, 0.845, 0.7584, 0.7993, 0.7378388489499601]
['context', 'False', 0.85, 0.877, 0.7896, 0.831, 0.5]
['complex', 'False', 0.85, 0.878, 0.7913, 0.8324, 0.991991991991992]
['purename', 'False', 0.9, 0.8346, 0.7581, 0.7945, 0.7070707070707072]
['context', 'False', 0.9, 0.877, 0.7896, 0.831, 0.5]
['complex', 'False', 0.9, 0.8346, 0.7581, 0.7945, 0.7777777777777778]
['purename', 'False', 0.95, 0.8346, 0.7581, 0.7945, 0.7474747474747475]
['context', 'False', 0.95, 0.876, 0.792, 0.8319, 0.9964964964964965]
['complex', 'False', 0.95, 0.878, 0.7913, 0.8324, 0.994994994994995]
[['purename', 'False', 0.85, 0.845, 0.7584, 0.7993, 0.7378388489499601], ['context', 'False', 0.85, 0.877, 0.7896, 0.831, 0.5], ['complex', 'False', 0.85, 0.878, 0.7913, 0.8324, 0.991991991991992], ['purename', 'False', 0.9, 0.8346, 0.7581, 0.7945, 0.7070707070707072], ['context', 'False', 0.9, 0.877, 0.7896, 0.831, 0.5], ['complex', 'False', 0.9, 0.8346, 0.7581, 0.7945, 0.7777777777777778], ['puren

In [3]:
import argparse
from collections import defaultdict
import sys
sys.path.append('../../../')
from el_evaluation import *


parser = argparse.ArgumentParser(description="main training script for training lnn entity linking models")
parser.add_argument("--train_data", type=str, default="./data/train.csv", help="train csv")
parser.add_argument("--test_data", type=str, default="./data/test.csv", help="test csv")
parser.add_argument("--checkpoint_name", type=str, default="checkpoint/best_model.pt", help="checkpoint path")
parser.add_argument("--output_file_name", type=str, default="output/purename_nway_alpha095.csv", help="checkpoint path")
parser.add_argument("--model_name", type=str, default="purename", help="which model we choose")
# args for dividing the corpus
parser.add_argument('--alpha', type=float, default=0.95, help='alpha for LNN')
parser.add_argument('--num_epoch', type=int, default=200, help='training epochs for LNN')
parser.add_argument("--use_binary", action="store_true", help="default is to use binary`, otherwise use stem")
parser.add_argument("-f")
args = parser.parse_args()

args.use_binary = True

if args.use_binary:
    from RuleLNN_binary import *
else:
    from RuleLNN_nway import *

In [4]:
df_train_val = pd.read_csv(args.train_data)
df_test = pd.read_csv(args.test_data)

# train
features_train_val = np.array(
    [np.fromstring(s[1:-1], dtype=np.float, sep=', ') for s in df_train_val.Features.values])
X_train_val = torch.from_numpy(features_train_val).float()
Y_train_val = torch.from_numpy(df_train_val.Label.values).float()
mention_labels_train_val = df_train_val.Mention_label.values
questions_train_val = df_train_val.Question.values
x_train, x_val, y_train, y_val, m_labels_train, m_labels_val, ques_train, ques_val = \
    train_test_split(X_train_val, Y_train_val, mention_labels_train_val, questions_train_val,
                     test_size=0.2, train_size=0.8, random_state=100)

# test
features_test = np.array([np.fromstring(s[1:-1], dtype=np.float, sep=', ') for s in df_test.Features.values])
x_test = torch.from_numpy(features_test).float()
y_test = torch.from_numpy(df_test.Label.values).float()
m_labels_test = df_test.Mention_label.values


In [9]:
len(set(df_test.Question.values))
df_test.head()

Unnamed: 0.1,Unnamed: 0,Question,Mention_label,Features,Label
0,0,Who was the wife of U.S. president Lincoln?,U.S.;United States,"[0.5961538553237915, 0.0, 0.15384615384615385,...",0
1,1,Who was the wife of U.S. president Lincoln?,U.S.;National Register of Historic Places,"[0.42592594027519226, 0.0, 0.02777777777777779...",0
2,2,Who was the wife of U.S. president Lincoln?,U.S.;United States Census Bureau,"[0.5722222328186035, 0.0, 0.07407407407407407,...",0
3,3,Who was the wife of U.S. president Lincoln?,U.S.;Hispanic (U.S. Census),"[0.6439393758773804, 0.0, 0.18181818181818177,...",0
4,4,Who was the wife of U.S. president Lincoln?,U.S.;United States Navy,"[0.5833333134651184, 0.0, 0.11111111111111116,...",0


In [74]:
def pick_model(model_name, alpha):
    if model_name == "purename":
        return PureNameLNN(alpha, 2, False)
    elif model_name == "context":
        return ContextLNN(alpha, 2, False)
    elif model_name == "complex":
        return ComplexRuleLNN(alpha, 2, False)
    else:
        print("WRONG name input")
        return None


def get_qald_metrics(val_pred, val_y, val_m_labels, ques_val):
    """val_pred are 0/1 s after applying a threshold"""
    rows = []
    question_rows_map = defaultdict(list)

    for i, pred in enumerate(val_pred):
        pred = pred.data.tolist()[0]
        if pred:
            men_entity_label = '_'.join(val_m_labels[i].split(';')[-1].split())
            question_rows_map[ques_val[i]].append(('http://dbpedia.org/resource/{}'.format(men_entity_label), 1.0))
#             print(ques_val[i], question_rows_map[ques_val[i]])

    for key, value in question_rows_map.items():
        rows.append([key, [value]])

    df_output = pd.DataFrame(rows, columns=['Question', 'Entities'])
    df_output['Classes'] = str([])
    df_output.head()
    
    # gold 
    benchmark = pd.read_csv('../../../data/gt_sparql.csv')
    benchmark = benchmark.set_index('Question')
    benchmark = benchmark.replace(np.nan, '', regex=True)
    benchmark['Entities'] = benchmark['Entities'].astype(object)
    is_qald_gt = True
    
    # pred 
    predictions = df_output
    predictions = predictions.set_index('Question')
    predictions['Entities'] = predictions['Entities']
    predictions['Classes'] = predictions['Classes']

    metrics = compute_metrics(benchmark=benchmark, predictions=predictions, limit=410, is_qald_gt=is_qald_gt, eval='full')

    scores = metrics['macro']['named']
    prec, recall, f1 = scores['precision'], scores['recall'], scores['f1']
    print(prec, recall, f1)
    return prec, recall, f1


def evaluate(eval_model, x_eval, y_eval, m_labels_eval, ques_eval, loss_fn, threshold=0.5):
    """evaluate a model on validation data"""
    eval_model.eval()
    with torch.no_grad():
        val_pred = eval_model(x_eval, m_labels_eval)
        loss = loss_fn(val_pred, y_eval)
        val_pred_ = val_pred > threshold
        print("val loss", loss)
        prec, recall, f1 = get_qald_metrics(val_pred_, y_eval, m_labels_eval, ques_eval)
#         prec, recall, f1, _ = precision_recall_fscore_support(y_eval, val_pred_, average='macro')
        print("f1 is {} w/ threshold {} ".format(f1, threshold))
#     return loss, f1, val_pred

    
    return loss, f1, val_pred

In [75]:
model = pick_model(args.model_name, args.alpha)
loss_fn = nn.BCELoss()
loss, f1, val_pred = evaluate(model, x_val, y_val, m_labels_val, ques_val, loss_fn)

val loss tensor(0.0571)
0.4792 0.5417 0.5085
f1 is 0.5085 w/ threshold 0.5 


In [77]:
best_pred = val_pred
best_val_f1, best_val_loss = 0, 1000
best_tuned_threshold = 0.5
best_tuned_f1 = best_val_f1
for threshold_ in np.linspace(0.99, 1.0, num=100):
    y_val_preds = best_pred >= threshold_
    prec, recall, f1 = get_qald_metrics(y_val_preds, y_val, m_labels_val, ques_val)
    if f1 > best_tuned_f1:
        best_tuned_threshold = threshold_
        best_tuned_f1 = f1
print("Val -- After tuning, the best f1 is {} w/ threshold {}".format(best_tuned_f1, best_tuned_threshold))

0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5 0.5652 0.5306
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.5424
0.5111 0.5778 0.

In [66]:
ques_test = df_test.Question

evaluate(model, x_test, y_test, m_labels_test, ques_test, loss_fn, threshold=.9994949494949495)

val loss tensor(0.0524)
0.8058 0.7326 0.7674
f1 is 0.7674 w/ threshold 0.9994949494949495 


(tensor(0.0524),
 0.7674,
 tensor([[9.9950e-01],
         [5.2919e-14],
         [2.0628e-04],
         ...,
         [2.0741e-10],
         [5.3063e-20],
         [2.0741e-10]]))

In [None]:
# train model and evaluate
# print(model(x_train, m_labels_train))
# best_tuned_threshold = train(model, x_train, y_train, m_labels_train, ques_train, x_val, y_val, m_labels_val, ques_val, args.checkpoint_name, args.num_epoch)
# test_pred = test(x_test, y_test, m_labels_test, best_tuned_threshold, args.alpha, args.checkpoint_name, args.model_name)
# write_output(df_test, m_labels_test, test_pred, args.output_file_name)

# print(args.use_binary)

In [4]:
import sys
sys.path.append('../../../')

In [5]:
from el_evaluation import *

In [15]:
gold_file_path = '../../../data/gt_sparql.csv'
prediction_file = 'output/purename_nway_alpha095.csv' 


In [32]:
def eval(gold_file_path, prediction_file):
    
    # gold 
    benchmark = pd.read_csv(gold_file_path)
    benchmark = benchmark.set_index('Question')
    benchmark = benchmark.replace(np.nan, '', regex=True)
    benchmark['Entities'] = benchmark['Entities'].astype(object)
    is_qald_gt = True
    
    # pred 
    predictions = pd.read_csv(prediction_file)
    predictions = predictions.set_index('Question')
    predictions['Entities'] = predictions['Entities']
    predictions['Classes'] = predictions['Classes']

    metrics = compute_metrics(benchmark=benchmark, predictions=predictions, limit=410, is_qald_gt=is_qald_gt, eval='full')

    scores = metrics['macro']['named']
    prec, recall, f1 = scores['precision'], scores['recall'], scores['f1']
    print(prec, recall, f1)
    return prec, recall, f1

In [33]:

import glob, os
import pandas as pd

rows = []
columns = ['filename', 'alpha', 'precision', 'recall', 'f1']
for prediction_file in sorted(glob.glob("output/*.csv")):
    print("=====", prediction_file,"=====")
    prec, rec, f1 = eval(gold_file_path, prediction_file)
    filename = prediction_file.split('/')[-1][:-4]
    alpha = filename[-2:]
    print(filename, alpha, prec, rec, f1)
    rows.append([filename, alpha, prec, rec, f1])
df = pd.DataFrame(rows, columns=columns)
df.to_csv('eval_results.csv')

===== output/complex_binary_alpha085.csv =====


NameError: name 'gold_file_path' is not defined