In [10]:
import collections
import glob
import json
import os

import pandas as pd

import config

# Original Evidence Inference Dataset Exploration

In [3]:
# configuration
config.load_config_json_file("../path_config_local.json", include_in_hash=False)

In [17]:
# load the dataset
nxml_template = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "xml_files", "*.nxml")
nxml_file_paths = glob.glob(nxml_template)
nxml_file_paths.sort()

txt_template = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "txt_files", "*.txt")
txt_file_paths = glob.glob(txt_template)
txt_file_paths.sort()

prompts_path = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "prompts_merged.csv")
annotations_path = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "annotations_merged.csv")

train_ids_path = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "splits", "train_article_ids.txt")
dev_ids_path = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "splits", "validation_article_ids.txt")
test_ids_path = os.path.join(config.get("path.EVIDENCE-INFERENCE"), "evidence-inference", "annotations", "splits", "test_article_ids.txt")

print(f"Gathered {len(nxml_file_paths)} XML file paths and {len(txt_file_paths)} TXT file paths.")

Gathered 4470 XML file paths and 4470 TXT file paths.


### Number of invalid annotations

In [8]:
df = pd.read_csv(annotations_path, sep=",")

num_valid_label, num_invalid_label = 0, 0
num_valid_reasoning, num_invalid_reasoning = 0, 0
for idx, row in df.iterrows():
    if row["Valid Label"]:
        num_valid_label += 1
    else:
        num_invalid_label += 1
    if row["Valid Reasoning"]:
        num_valid_reasoning += 1
    else:
        num_invalid_reasoning += 1

print("Valid label:", num_valid_label)
print("Invalid label:", num_invalid_label)

print("Valid reasoning:", num_valid_reasoning)
print("Invalid reasoning:", num_invalid_reasoning)

Valid label: 24321
Invalid label: 365
Valid reasoning: 24460
Invalid reasoning: 226


### Number of annotations with the same prompt ID and user ID

In [16]:
df = pd.read_csv(annotations_path, sep=",")

counter = collections.Counter()

for idx, row in df.iterrows():
    counter[(row["PromptID"], row["UserID"])] += 1

values = collections.Counter()
for value, count in counter.items():
    values[count] += 1

print(json.dumps(dict(values.most_common()), indent=4))

{
    "1": 22894,
    "2": 591,
    "3": 125,
    "4": 30,
    "5": 11,
    "6": 4,
    "8": 1,
    "9": 1,
    "7": 1,
    "12": 1
}


### Assert that label codes and labels are consistent

In [18]:
df = pd.read_csv(annotations_path, sep=",")

LABEL_TO_CODE = {
    "significantly decreased": -1,
    "no significant difference": 0,
    "significantly increased": 1
}

CODE_TO_LABEL = {
    -1: "significantly decreased",
    0: "no significant difference",
    1: "significantly increased"
}

for idx, row in df.iterrows():
    assert LABEL_TO_CODE[row["Label"]] == row["Label Code"]
    assert CODE_TO_LABEL[row["Label Code"]] == row["Label"]