In [1]:
import csv
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from statistics import mean


In [None]:
# create data directories for running the SemBERT model on locutions
# please note that we will create a test.tsv and a dev.tsv since executing the SemBERT model seeks for these file names
locution_df = pd.read_csv("SemBERT_data/SemBERT_locutions.tsv", sep="\t")
locution_df.to_csv("SemBERT_data/locutions/test.tsv", index=False, sep="\t")
locution_df.to_csv("SemBERT_data/locutions/dev.tsv", sep="\t", index=False)

In [None]:
# run the run_snli_predict.py from SemBERT you might want to change some parts of this call before running it to accomodate to your device
# make sure you have the necessary packages for SemBERT installed
# in case you run into version issues, you might want to install pytorch to the specified version after installing allennlp

# produce 3 prediction files on locutions with SemBERT
for i in range(1,4):
    # the following call will run the call of run_snli_predict.py as it was run for the reported results in the paper
    #! CUDA_VISIBLE_DEVICES=1 python SemBERT/run_snli_predict.py --data_dir SemBERT_data/locutions --task_name snli --eval_batch_size 32 --max_num_aspect 3 --do_predict --do_lower_case --bert_model SemBERT/snli_model_dir/ --output_dir SemBERT_eval/ --tagger_path SemBERT/srl_model_dir/
    # the call above will produce a _pred_results.tsv in the snli_model_dir

    # now, the created _pred_results.tsv files need to be moved to another directory before the predictions for propositions are created
    preds = pd.read_csv("SemBERT/snli_model_dir/_pred_results.tsv", sep="\t")
    preds.to_csv(f"SemBERT_eval/predictions/_pred_results_locutions_{i}.tsv", sep="\t", index=False)

In [None]:
# create data directories for running the SemBERT model on propositions
# please note that we will create a test.tsv and a dev.tsv since executing the SemBERT model seeks for these file names
proposition_df = pd.read_csv("SemBERT_data/SemBERT_propositions.tsv", sep="\t")
proposition_df.to_csv("SemBERT_data/propositions/test.tsv", sep="ţ", index=False)
proposition_df.to_csv("SemBERT_data/propositions/dev.tsv", sep="\t", index=False)

In [None]:
# produce 3 prediction files on locutions with SemBERT
for i in range(1,4):
    # the following call will run the call of run_snli_predict.py as it was run for the reported results in the paper
    #! CUDA_VISIBLE_DEVICES=1 python SemBERT/run_snli_predict.py --data_dir SemBERT_data/propositions --task_name snli --eval_batch_size 32 --max_num_aspect 3 --do_predict --do_lower_case --bert_model SemBERT/snli_model_dir/ --output_dir SemBERT_eval/ --tagger_path SemBERT/srl_model_dir/
    # the call above will produce a _pred_results.tsv in the snli_model_dir

    # now, the created _pred_results.tsv files need to be moved to another directory before the predictions for propositions are created
    preds = pd.read_csv("SemBERT/snli_model_dir/_pred_results.tsv", sep="\t")
    preds.to_csv(f"SemBERT_eval/predictions/_pred_results_propositions_{i}.tsv", sep="\t", index=False)

In [14]:
# create evaluations for the runs on locutions and propositions

result_df = pd.DataFrame(columns = ["data_type", "F1", "F1_std", "accs", "accs_std"])

all_results_df = pd.DataFrame(columns = ["data_type", "F1s", "accs"])

f1_means = []
f1_stds = []
acc_means = []
acc_stds = []

all_f1 = []
all_acc = []
all_types = []

types = ["locutions", "props"]
for ty in types:
    accs = []
    f1s = []
    for i in range(1,4):
        pred_file = f"SemBERT_eval/predictions/_pred_results_{ty}_{i}.tsv"
        gold_file = "../../data/CAPTURE_final_corpus/fullCorpus/CAPTURE_final.tsv"

        pred_df = pd.read_csv(pred_file, sep="\t")
        gold_df = pd.read_csv(gold_file, sep="\t")

        gold = list(gold_df["label"])

        pred = [1 if lab == "contradiction" else 0 for lab in pred_df["prediction"]]

        acc = accuracy_score(gold, pred)
        f1 = f1_score(gold, pred)
        
        accs.append(acc)
        f1s.append(f1)
        all_types.append(ty)
        all_f1.append(f1)
        all_acc.append(acc)

    f"Results from SemBERT prediction on {ty}"
    print("f1:", round((mean(f1s)*100),1), "+-", 100*np.std(np.asarray(f1s)))
    print("acc:", round(100*mean(accs),1), "+-", 100*np.std(np.asarray(accs)))

    f1_means.append(mean(f1s))
    f1_stds.append(np.std(np.asarray(f1s)))

    acc_means.append(mean(accs))
    acc_stds.append(np.std(np.asarray(accs)))

result_df["data_type"] = types
result_df["F1"] = f1_means
result_df["F1_std"] = f1_stds
result_df["accs"] = acc_means
result_df["acc_std"] = acc_stds

#result_df.to_csv("SemBERT_eval/mean_eval_results.tsv", sep="\t", index=False)


all_results_df["data_type"] = all_types
all_results_df["F1s"] = all_f1
all_results_df["accs"] = all_acc

#all_results_df.to_csv("SemBERT_eval/all_eval_results.tsv", sep="\t", index=False)

f1: 33.3 +- 0.0
acc: 52.6 +- 0.0
f1: 33.9 +- 2.188180498112527
acc: 52.1 +- 0.8894424920585502
