In [None]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

In [None]:
from baseline.coref_resolution import *

In [None]:
vec, X, y, pairs = generate_data('../data/pwc_split_on_labeled/train.jsonl', 
                                '../data/pwc_split_on_labeled/dev.jsonl', 
                                '../data/pwc_split_on_labeled/test.jsonl')

In [None]:
from sklearn.linear_model import LogisticRegressionCV
from yellowbrick.classifier import DiscriminationThreshold

In [None]:
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    lr = LogisticRegressionCV(class_weight="balanced", penalty="l2", scoring="f1", cv=5)
    lr.fit(X["train"], y["train"])

logging.info("Fitted Logistic Regression ..")

pred = lr.predict(X["dev"])

In [None]:
pred_prob = lr.predict_proba(X["dev"])

In [None]:
from dygie.training.thresholding_f1_metric import *

In [None]:
metric = BinaryThresholdF1(bins=100)

In [None]:
import torch

In [None]:
p = torch.Tensor([pred_prob[:, 1]])
g = torch.Tensor(np.array([y["dev"]]))

In [None]:
metric(p, g)

In [None]:
precision = f1_metric._prf_divide(metric.matched_counts, metric.predicted_counts)
recall = f1_metric._prf_divide(metric.matched_counts, metric.total_counts)
f1 = f1_metric._prf_divide(2*precision*recall , (precision + recall))

In [None]:
metric.get_metric(reset=False)

In [None]:
plt.plot(precision)
plt.plot(recall)
plt.plot(f1)
plt.show()

In [None]:
np.abs(precision - recall)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
from sklearn.metrics import *
from tqdm import tqdm

In [None]:
precision, recall, f1 = [], [], []
pred_prob = lr.predict_proba(X['dev'])
for t in tqdm(np.linspace(0, 1, 100)) :
    pred = np.where(pred_prob[:, 1] > t, 1, 0)
    p, r, f, s = precision_recall_fscore_support(y['dev'], pred, average='binary')
    precision.append(p)
    recall.append(r)
    f1.append(f)

In [None]:
plt.plot(precision, label="precision")
plt.plot(recall, label="recall")
plt.plot(f1, label="f1")
plt.legend()
plt.show()

In [None]:
fpr, tpr, thresholds = roc_curve(y["dev"], pred_prob[:, 1])

In [None]:
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b')
# plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
optimal_idx = np.argmax(tpr + (1 - fpr))
optimal_threshold = thresholds[optimal_idx]

In [None]:
optimal_threshold

In [None]:
tpr - fpr

In [None]:
print(classification_report(y["dev"], pred_prob[:, 1] > 0.7))

In [None]:
precision, recall, _ = precision_recall_curve(y['dev'], pred_prob[:, 1])
from inspect import signature

step_kwargs = ({'step': 'post'}
               if 'step' in signature(plt.fill_between).parameters
               else {})
plt.step(recall, precision, color='b', alpha=0.2,
         where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])

In [None]:
import numpy.ma as ma

In [None]:
a = ma.array(np.array([1, 2, 3]), mask=[0, 1, 0])
b = ma.array(np.array([2, 10, 100]), mask=[0, 0, 1])

In [None]:
np.argmin(a/b)

In [None]:
a/b

In [None]:
from dygie.data.dataset_readers.entity_linking_reader import PwCLinkerReader

In [None]:
pairs = PwCLinkerReader.generate_pairs('../data/pwc_split_on_labeled/dev.jsonl')

In [None]:
f = open('../data/pwc_split_on_labeled/dev_entity_pairs.jsonl', 'w')
import json
for p in pairs :
    f.write(json.dumps({"premise" : p[0], "hypothesis": p[1], "label" : p[2]}) + '\n')
f.close()

In [None]:
predictions = [json.loads(line) for 
               line in open('../outputs/pwc_outputs/experiment_linker/train_sampled/20190806-202943/dev_predict.jsonl')]

In [None]:
pred_labels = [x['label'] for x in predictions]

In [None]:
from sklearn.metrics import *

In [None]:
tn, fp, fn, tp = confusion_matrix([1 if x[2] == 'Entailment' else 0 for x in pairs], pred_labels).ravel()

In [None]:
examples = {(p, g):[] for p in range(2) for g in range(2)}

In [None]:
for i in range(len(pairs)) :
    p = pred_labels[i]
    g = 1 if pairs[i][2] == 'Entailment' else 0
    examples[(p, g)].append(pairs[i])

In [None]:
for i in range(len(examples[(1,1)])) : 
    e = examples[(1, 1)][i]
    if 'stanford' in e[0].lower() or 'stanford' in e[1].lower() :
        print(examples[(1, 1)][i])