In [1]:
import ast
import json
import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
def load_jsonl(path, key1="sent1_str", key2="sent2_str"):
    out = []

    def strip(x):
        return "-".join(x.split()).lower()

    def get_key(row):
        return strip(row[key1]) + strip(row[key2])

    with open(path, "r") as f:
        for l in f.readlines():
            out.append(json.loads(l))

    df = pd.DataFrame(out)
    df["key"] = df.apply(get_key, axis=1)
    return df


def load_df(path, key1="sent1_str", key2="sent2_str"):
    out = []

    def strip(x):
        return "-".join(x).lower()

    def get_key(row):
        return strip(ast.literal_eval(row[key1])) + strip(ast.literal_eval(row[key2]))
    
    df = pd.read_csv(path, sep="\t")
    df["key"] = df.apply(get_key, axis=1)
    df = df.rename(
        {"0": "sent1", "1": "sent2", "2": "label"}
    )
    return df


In [3]:
PATH = "6-april-2020"
df_tokenized = load_df(f"{PATH}/tokenized.tsv")
df_random_tokenized = load_df(f"{PATH}/random-tokenized.tsv")

df = load_jsonl(f"{PATH}/nep_test.jsonl")
df_random = load_jsonl(f"{PATH}/nep-random_test.jsonl")

df_merged = df_tokenized.merge(df, on="key")
df_random_merged = df_random_tokenized.merge(df_random, on="key")

In [14]:
temp = df_random_merged[['sent_1',
 'sent_2',
 'label',
  'labels',
 'preds',
 'positive_1',
 'positive_2',
 'subsequence_1',
 'subsequence_2',
]]
temp.to_csv(f"{PATH}/random-results.tsv", sep='\t', index=False)

In [15]:
temp = df_merged[['sent_1',
 'sent_2',
 'label',
   'labels',
 'preds',
 'case',
 'cue',
 'common_cue',
]]
temp.to_csv(f"{PATH}/results.tsv", sep='\t', index=False)

### Original Exp.

In [4]:
temp = df_merged.copy()
temp["correct"] = 1. * (temp.labels == temp.preds)
temp["pred_neg"] = 1. * (temp.preds == 2)
temp["pred_pos"] = 1. * (temp.preds == 1)

temp.groupby(["cue"])[
    ["correct"]].agg(['count','mean'])

Unnamed: 0_level_0,correct,correct
Unnamed: 0_level_1,count,mean
cue,Unnamed: 1_level_2,Unnamed: 2_level_2
-less,60,0.566667
-lessly,29,0.413793
Not-not,3,0.333333
a-,61,0.459016
ab-,58,0.448276
and-fail,1,0.0
by-no-means,7,0.571429
dis-,42,0.642857
fail,27,0.814815
far-from,2,0.5


### ``but''

In [10]:
temp["but_1"] = temp.sent_1.apply(lambda x: "but" in x)
temp["but_2"] = temp.sent_2.apply(lambda x: "but" in x)

temp.groupby(["label", "but_1", "but_2", "case"])[["correct"]].agg(['count','mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,correct,correct
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,count,mean
label,but_1,but_2,case,Unnamed: 4_level_2,Unnamed: 5_level_2
contradiction,False,False,a: within scope.,1221,0.771499
contradiction,False,False,b: cue-removed,1543,0.611795
contradiction,True,False,a: within scope.,229,0.812227
contradiction,True,False,b: cue-removed,98,0.632653
contradiction,True,True,a: within scope.,6,0.833333
contradiction,True,True,b: cue-removed,251,0.366534
entailment,False,False,c: a S clause,711,0.940928
entailment,True,False,c: a S clause,340,0.941176
entailment,True,True,c: a S clause,27,0.962963


### Random

In [8]:
temp = df_random_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp["pred_neg"] = temp.preds == 2
temp["pred_pos"] = temp.preds == 1

temp.groupby(["positive_1", "positive_2", "subsequence_1", "subsequence_2"])[
    ["correct", "pred_neg", "pred_pos"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,correct,pred_neg,pred_pos
positive_1,positive_2,subsequence_1,subsequence_2,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
False,False,False,False,0.677481,0.311069,0.01145
False,False,False,True,0.622984,0.360887,0.016129
False,False,True,False,0.554435,0.445565,0.0
False,False,True,True,0.611842,0.381579,0.006579
False,True,False,True,0.75447,0.237964,0.007565
False,True,True,True,0.763889,0.233586,0.002525
True,False,True,False,0.551582,0.44773,0.000688
True,False,True,True,0.644025,0.332075,0.023899
True,True,True,True,0.75794,0.227683,0.014376


In [9]:
temp.groupby(["positive_1", "positive_2"])[
    ["correct", "pred_neg", "pred_pos"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,correct,pred_neg,pred_pos
positive_1,positive_2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,0.631399,0.359215,0.009386
False,True,0.757792,0.23642,0.005788
True,False,0.58426,0.406847,0.008893
True,True,0.75794,0.227683,0.014376


### Length

In [10]:
# original
temp = df_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp["len_premise"] = temp.sent_1.apply(len)
temp["len_hypothesis"] = temp.sent_2.apply(len)

temp.groupby(["correct", "label"])[
    ["len_premise", "len_hypothesis"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,len_premise,len_hypothesis
correct,label,Unnamed: 2_level_1,Unnamed: 3_level_1
False,contradiction,146.907789,106.38675
False,entailment,127.761905,39.619048
True,contradiction,100.339758,58.25325
True,entailment,156.232512,52.571429


In [11]:
temp.groupby(["correct", "label", "case"])[
    ["len_premise", "len_hypothesis"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,len_premise,len_hypothesis
correct,label,case,Unnamed: 3_level_1,Unnamed: 4_level_1
False,contradiction,a: within scope.,125.154799,38.325077
False,contradiction,b: cue-removed,155.756927,134.074307
False,entailment,c: a S clause,127.761905,39.619048
True,contradiction,a: within scope.,101.485437,39.906443
True,contradiction,b: cue-removed,99.157559,77.184882
True,entailment,c: a S clause,156.232512,52.571429


In [12]:
# random
temp = df_random_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp["pred_neg"] = temp.preds == 2
temp["pred_pos"] = temp.preds == 1
temp["len_premise"] = temp.sent_1.apply(len)
temp["len_hypothesis"] = temp.sent_2.apply(len)

temp.groupby(["positive_1", "positive_2", "correct"])[
    ["len_premise", "len_hypothesis"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,len_premise,len_hypothesis
positive_1,positive_2,correct,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,False,91.565972,75.331019
False,False,True,81.518919,90.996622
False,True,False,95.014706,52.773897
False,True,True,80.385429,62.148061
True,False,False,65.254545,75.408556
True,False,True,56.954338,89.573059
True,True,False,66.18232,52.252762
True,True,True,57.287605,61.744155


### Cases

In [13]:
# original
temp = df_merged.copy()
temp["correct"] = temp.labels == temp.preds
temp.groupby(["label", "case"])[
    ["correct"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,correct
label,case,Unnamed: 2_level_1
contradiction,a: within scope.,0.778159
contradiction,b: cue-removed,0.580338
entailment,c: a S clause,0.941558
