In [1]:
import pickle
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score, average_precision_score
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:

perplexions_path = '../data/unified_clean_ant_pairs_formated_def.pickle'
test_dataset_path = '../data/ant_test.txt'

with open(perplexions_path, 'rb') as f: #(ребенок, родитель): перплексия
    ppls = pickle.load(f)


ppls_clean = dict()
for item in ppls.items():
    ppls_clean[(item[0][0].split('(')[0].strip(', '), item[0][1].strip(', '))] = item[1]


In [3]:

perplexions_path = '/home/LLM_Taxonomy/LexicalEntailment/data/ant_pairs_formated_def_ppls.pickle'
test_dataset_path = '../data/ant_test.txt'

with open(perplexions_path, 'rb') as f: #(ребенок, родитель): перплексия
    ppls = pickle.load(f)


ppls_clean2 = dict()
for item in ppls.items():
    ppls_clean2[(item[0][0].split('(')[0].strip(', '), item[0][1].strip(', '))] = item[1]


In [4]:
 ppls_clean

{('cracked up at', 'listened to'): 50.164817810058594,
 ('listened to', 'cracked up at'): 978.0380859375,
 ('came to', 'knew the location of'): 83.42024993896484,
 ('knew the location of', 'came to'): 166.94618225097656,
 ('disrespected', 'had'): 206.806640625,
 ('had', 'disrespected'): 1397.477294921875,
 ('verified', 'thought about'): 3369.3388671875,
 ('thought about', 'verified'): 9824.828125,
 ('disliked', 'thought about'): 1128.14111328125,
 ('thought about', 'disliked'): 40.16008758544922,
 ('conformed to', 'knew'): 1586.60205078125,
 ('knew', 'conformed to'): 1854.51904296875,
 ('tensed', 'adjusted'): 546.844970703125,
 ('adjusted', 'tensed'): 254.29197692871094,
 ('admitted', 'evaluated'): 11651.5712890625,
 ('evaluated', 'admitted'): 160.1088409423828,
 ('weakened', 'changed'): 804.2686157226562,
 ('changed', 'weakened'): 792.29248046875,
 ('was killed by', 'dealt with'): 235.72515869140625,
 ('dealt with', 'was killed by'): 2291.089111328125,
 ('vexed', 'interacted with'): 1

In [5]:
ppls_clean[('was the same as', 'was compared to')], ppls_clean2[('was the same as', 'was compared to')], 

(169.66836547851562, 69.92713165283203)

In [6]:
ppls_clean[('was compared to', 'was the same as')], ppls_clean2[('was compared to', 'was the same as')], 

(71.4636459350586, 15.823185920715332)

In [7]:
pairs = []
y_true = []
non_reversed = []
not_found = []
data = []

with open(test_dataset_path, 'r',encoding='utf-8') as f:
    i = 0
    lines = f.readlines()
    for line in lines:
        ex1, ex2, category = line.strip('\n').split('\t')
        s11, v1, s12 = ex1.split(',')
        s21, v2, s22 = ex2.split(',')
        # if s11 == s21 and s12 == s22:
        v1 = v1.strip(' ')
        v2 = v2.strip(' ')
        if category == 'directional_entailment': # child, parent
            data.append((v1, v2, 1))

        elif category == 'directional_non-entailment': # parent, child
            data.append((v1, v2, 0))
        # else:
        #     non_reversed.append((s11, s12, v1, s21, s22, v2, category))

y_true = [elem[2] for elem in data]

In [8]:
data[-5:]

[('was compared to', 'was equal to', 0),
 ('was the same as', 'was compared to', 1),
 ('was compared to', 'was the same as', 0),
 ('was identical to', 'was compared to', 1),
 ('was compared to', 'was identical to', 0)]

In [9]:
def count_binary(data, ppls_clean, thr=0):

    y_pred = []

    for child, parent, label in data:
        if not (child, parent) in ppls_clean.keys():
            y_pred.append(0)
            continue
        
        forward_ppl = ppls_clean[(child, parent)]
        backward_ppl = ppls_clean[(parent, child)]
        # backward_ppl  = ppls_clean[(child, parent)]
        # forward_ppl = ppls_clean[(parent, child)]

        if (forward_ppl-backward_ppl < thr):
            y_pred.append(1)
        else:
            y_pred.append(0)
    
    print('ROC AUC score: ', roc_auc_score(y_true, y_pred))
    print('Average precision: ', average_precision_score(y_true, y_pred))

def count_diff(data, ppls_clean, low_thr=-100000, high_thr=100000):

    y_pred = []

    for child, parent, label in data:
        if not (child, parent) in ppls_clean.keys():
            y_pred.append(0)
            continue
        
        forward_ppl = ppls_clean[(child, parent)]
        backward_ppl = ppls_clean[(parent, child)]

        y_pred.append(np.clip(backward_ppl-forward_ppl, low_thr, high_thr))
    y_pred = preprocessing.normalize(np.array([y_pred]), norm='l1')[0]
    roc_auc =  roc_auc_score(y_true, y_pred)
    ap = average_precision_score(y_true, y_pred)
    # print('ROC AUC score: ',)
    # print('Average precision: ',)
    return roc_auc, ap

def count_frac(data, ppls_clean,low_thr=-100000, high_thr=100000):

    y_pred = []

    for child, parent, label in data:
        if not (child, parent) in ppls_clean.keys():
            y_pred.append(1)
            continue
        
        forward_ppl = ppls_clean[(child, parent)]
        backward_ppl = ppls_clean[(parent, child)]

        y_pred.append(np.clip((backward_ppl/forward_ppl), low_thr, high_thr))
    y_pred = preprocessing.normalize(np.array([y_pred]), norm='max')[0]
    roc_auc =  roc_auc_score(y_true, y_pred)
    ap = average_precision_score(y_true, y_pred)
    # print('ROC AUC score: ',)
    # print('Average precision: ',)
    return roc_auc, ap

def count_frac_diff(data, ppls_clean):

    y_pred_frac = []
    y_pred_diff = []

    for child, parent, label in data:
        if not (child, parent) in ppls_clean.keys():
            y_pred_frac.append(1)
            y_pred_diff.append(1)
            continue
        
        forward_ppl = ppls_clean[(child, parent)]
        backward_ppl = ppls_clean[(parent, child)]

        
        y_pred_frac.append(backward_ppl/forward_ppl)
        y_pred_diff.append(backward_ppl-forward_ppl)
    y_pred_frac = preprocessing.normalize(np.array([y_pred_frac]), norm='max')[0]
    y_pred_diff = preprocessing.normalize(np.array([y_pred_diff]), norm='max')[0]
    y_pred = (y_pred_frac + y_pred_diff) / 2
    roc_auc =  roc_auc_score(y_true, y_pred)
    ap = average_precision_score(y_true, y_pred)
    # print('ROC AUC score: ',)
    # print('Average precision: ',)
    return roc_auc, ap

In [10]:
count_binary(data, ppls_clean)

ROC AUC score:  0.5064846416382253
Average precision:  0.5032848944857078


In [11]:
count_diff(data, ppls_clean)

(0.5273515125394588, 0.5418063835160282)

In [12]:
count_frac(data, ppls_clean, 0, 10)

(0.5195224172675278, 0.519158271643136)

In [13]:
count_frac_diff(data, ppls_clean)

(0.5281650339549674, 0.5423642265155525)

In [34]:
df = {'forward':[], 'backward':[], 'target':[]}

for child, parent, label in data:
    if not (child, parent) in ppls_clean.keys():
        #y_pred.append(1)
        continue
    
    
    forward_ppl = ppls_clean[(child, parent)]
    backward_ppl = ppls_clean[(parent, child)]
    df['forward'].append(forward_ppl)
    df['backward'].append(backward_ppl)
    df['target'].append(label)

In [35]:
X = pd.DataFrame(df)

In [36]:
X['frac'] = X['backward'] / X['forward']
X['diff'] = X['backward'] - X['forward']
X['mul'] = X['backward'] * X['forward']
X['sign'] = X['frac'] < 0.5

scaler = preprocessing.Normalizer()
X[['forward', 'backward', 'frac', 'diff', 'mul']] = scaler.fit_transform(X[['forward', 'backward', 'frac', 'diff', 'mul']] )

In [38]:
dropping = ['target', 'mul', 'sign', 'frac', 'forward', 'backward', 'frac']

logreg = LogisticRegression(C=1, max_iter=1000, fit_intercept=False)
logreg.fit(X.drop(columns=dropping), X['target'])

probas = logreg.predict_proba(X.drop(columns=dropping))
y_pred = probas[:,1]
y_true = X['target']

roc_auc =  roc_auc_score(y_true, y_pred)
ap = average_precision_score(y_true, y_pred)

roc_auc, ap

(0.49965350460786445, 0.5077852708148656)

In [39]:
logreg.coef_, logreg.intercept_

(array([[1.17111725]]), array([0.]))

In [373]:
from sklearn.linear_model import SGDClassifier

In [386]:
dropping = ['target', 'mul', 'sign', 'backward', 'forward', 'frac']

logreg = SGDClassifier(loss='log_loss', tol=1e-15)
logreg.fit(X.drop(columns=dropping), X['target'])

probas = logreg.predict_proba(X.drop(columns=dropping))
y_pred = probas[:,1]
y_true = X['target']

roc_auc =  roc_auc_score(y_true, y_pred)
ap = average_precision_score(y_true, y_pred)

roc_auc, ap

(0.6920013421334584, 0.6951447181974137)

In [327]:
dropping = ['target']

logreg = DecisionTreeClassifier(max_depth=5)
logreg.fit(X.drop(columns=dropping), X['target'])

probas = logreg.predict_proba(X.drop(columns=dropping))
y_pred = probas[:,1]
y_true = X['target']

roc_auc =  roc_auc_score(y_true, y_pred)
ap = average_precision_score(y_true, y_pred)

roc_auc, ap

(0.717876254913848, 0.6959064805460624)

(0.6556946303223027, 0.655444325087572)

In [None]:

y_true_2 = []
y_pred_2 = []

with open(test_dataset_path, 'r',encoding='utf-8') as f:
    i = 0
    lines = f.readlines()
    for line in lines:
        ex1, ex2, category = line.strip('\n').split('\t')
        s11, v1, s12 = ex1.split(',')
        s21, v2, s22 = ex2.split(',')
        if s11 == s21 and s12 == s22:
            v1 = v1.strip(' ')
            v2 = v2.strip(' ')
            if category == 'directional_entailment': # child, parent
                if (v1, v2) not in ppls_clean.keys():
                    x = 0
                else:
                    y_pred_2.append(ppls_clean[(v1, v2)]-ppls_clean[(v2, v1)])
                    y_true_2.append(0)

            elif category == 'directional_non-entailment': # parent, child
                if (v1, v2) not in ppls_clean.keys():
                    x = 0
                else:
                    y_pred_2.append(ppls_clean[(v1, v2)]-ppls_clean[(v2, v1)])
                    y_true_2.append(1)


normalized_y_pred = preprocessing.normalize([np.array(y_pred_2)])
print('ROC AUC score: ', roc_auc_score(y_true_2, normalized_y_pred[0]))
print('Average precision: ', average_precision_score(y_true_2, normalized_y_pred[0]))