In [1]:
import ast
import json
import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.
  "found relative to the 'datapath' directory.".format(key))


In [3]:
def load_jsonl(path, key1="sent1_str", key2="sent2_str"):
    out = []

    def strip(x):
        return "-".join(x.split()).lower()

    def get_key(row):
        return strip(row[key1]) + strip(row[key2])

    with open(path, "r") as f:
        for l in f.readlines():
            out.append(json.loads(l))

    df = pd.DataFrame(out)
    df["key"] = df.apply(get_key, axis=1)
    return df


def load_df(path, key1="sent1_str", key2="sent2_str"):
    out = []

    def strip(x):
        return "-".join(x).lower()

    def get_key(row):
        return strip(ast.literal_eval(row[key1])) + strip(ast.literal_eval(row[key2]))
    
    df = pd.read_csv(path, sep="\t")
    df["key"] = df.apply(get_key, axis=1)
    df = df.rename(
        {"0": "sent1", "1": "sent2", "2": "label"}
    )
    return df


In [5]:
df_tokenized = load_df(f"../data/NEP/all-tokenized.tsv")
df_random_tokenized = load_df(f"../data/NEP-random/all-tokenized.tsv")

PATH = "13-april-2020"
df = load_jsonl(f"{PATH}/nep_test.jsonl")
df_random = load_jsonl(f"{PATH}/nep-random_test.jsonl")

df_merged = df_tokenized.merge(df, on="key")
df_random_merged = df_random_tokenized.merge(df_random, on="key")

In [7]:
temp = df_random_merged[['sent_1',
 'sent_2',
 'label',
  'labels',
 'preds',
 'positive_1',
 'positive_2',
 'subsequence_1',
 'subsequence_2',
]]
temp.to_csv(f"{PATH}/random-results.tsv", sep='\t', index=False)

In [8]:
temp = df_merged[['sent_1',
 'sent_2',
 'label',
   'labels',
 'preds',
 'case',
 'cue',
 'common_cue',
]]
temp.to_csv(f"{PATH}/results.tsv", sep='\t', index=False)

### Cases

In [16]:
# original
temp = df_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp.groupby(["label", "case"])[
    ["correct"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,correct
label,case,Unnamed: 2_level_1
contradiction,a: within scope.,0.00206
contradiction,b: cue-removed,0.012685
entailment,c: a S clause,0.981447


### Original Exp.

In [18]:
temp = df_merged.copy()
temp = temp[temp.case == "a: within scope."]
temp["correct"] = 1. * (temp.labels == temp.preds)
temp["pred_neg"] = 1. * (temp.preds == 2)
temp["pred_pos"] = 1. * (temp.preds == 1)

temp.groupby(["cue"])[
    ["correct"]].agg(['count','mean'])

Unnamed: 0_level_0,correct,correct
Unnamed: 0_level_1,count,mean
cue,Unnamed: 1_level_2,Unnamed: 2_level_2
-less,12,0.0
-lessly,10,0.1
Not-not,2,0.0
ab-,13,0.0
by-no-means,2,0.0
dis-,5,0.0
fail,9,0.0
far-from,1,0.0
im-,33,0.0
in-,23,0.0


### ``but''

In [10]:
temp["but_1"] = temp.sent_1.apply(lambda x: "but" in x)
temp["but_2"] = temp.sent_2.apply(lambda x: "but" in x)

temp.groupby(["label", "but_1", "but_2", "case"])[["correct"]].agg(['count','mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,correct,correct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,count,mean
label,but_1,but_2,case,Unnamed: 4_level_2,Unnamed: 5_level_2
contradiction,False,False,a: within scope.,1221,0.001638
contradiction,False,False,b: cue-removed,1543,0.009721
contradiction,True,False,a: within scope.,229,0.004367
contradiction,True,False,b: cue-removed,98,0.040816
contradiction,True,True,a: within scope.,6,0.0
contradiction,True,True,b: cue-removed,251,0.01992
entailment,False,False,c: a S clause,711,0.988748
entailment,True,False,c: a S clause,340,0.964706
entailment,True,True,c: a S clause,27,1.0


### Random

In [19]:
temp = df_random_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp["pred_neg"] = temp.preds == 2
temp["pred_pos"] = temp.preds == 1

temp.groupby(["positive_1", "positive_2", "subsequence_1", "subsequence_2"])[
    ["correct", "pred_neg", "pred_pos"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,correct,pred_neg,pred_pos
positive_1,positive_2,subsequence_1,subsequence_2,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
False,False,False,False,0.055344,0.758588,0.186069
False,False,False,True,0.143145,0.725806,0.131048
False,False,True,False,0.0625,0.891129,0.046371
False,False,True,True,0.154605,0.723684,0.121711
False,True,False,True,0.149243,0.680193,0.170564
False,True,True,True,0.181818,0.676768,0.141414
True,False,True,False,0.053645,0.871389,0.074966
True,False,True,True,0.148428,0.719497,0.132075
True,True,True,True,0.189903,0.664995,0.145102


In [20]:
temp.groupby(["positive_1", "positive_2"])[
    ["correct", "pred_neg", "pred_pos"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,correct,pred_neg,pred_pos
positive_1,positive_2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,0.088311,0.775171,0.136519
False,True,0.16073,0.678985,0.160285
True,False,0.08715,0.817697,0.095153
True,True,0.189903,0.664995,0.145102


### Length

In [13]:
# original
temp = df_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp["len_premise"] = temp.sent_1.apply(len)
temp["len_hypothesis"] = temp.sent_2.apply(len)

temp.groupby(["correct", "label"])[
    ["len_premise", "len_hypothesis"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,len_premise,len_hypothesis
correct,label,Unnamed: 2_level_1,Unnamed: 3_level_1
False,contradiction,115.709425,74.40259
False,entailment,180.15,46.9
True,contradiction,136.407407,63.185185
True,entailment,154.085066,51.907372


In [14]:
temp.groupby(["correct", "label", "case"])[
    ["len_premise", "len_hypothesis"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,len_premise,len_hypothesis
correct,label,case,Unnamed: 3_level_1,Unnamed: 4_level_1
False,contradiction,a: within scope.,106.58362,39.569855
False,contradiction,b: cue-removed,122.807816,101.496788
False,entailment,c: a S clause,180.15,46.9
True,contradiction,a: within scope.,180.666667,32.666667
True,contradiction,b: cue-removed,130.875,67.0
True,entailment,c: a S clause,154.085066,51.907372


In [15]:
# random
temp = df_random_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp["pred_neg"] = temp.preds == 2
temp["pred_pos"] = temp.preds == 1
temp["len_premise"] = temp.sent_1.apply(len)
temp["len_hypothesis"] = temp.sent_2.apply(len)

temp.groupby(["positive_1", "positive_2", "correct"])[
    ["len_premise", "len_hypothesis"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,len_premise,len_hypothesis
positive_1,positive_2,correct,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,False,85.318203,85.987365
False,False,True,84.231884,77.323671
False,True,False,84.897613,60.395756
False,True,True,78.869806,57.171745
True,False,False,60.993668,84.565514
True,False,True,54.239796,74.454082
True,True,False,60.264135,60.78993
True,True,True,55.927817,53.716549
