Experiment resources related to the QUITE corpus (EMNLP 2024).

Copyright (c) 2024 Robert Bosch GmbH

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.

In [None]:
# Don't forget to add this repo to the PYTHONPATH env variable!
from src.constants import QUITE_Config
from src.experiments.src.numeric_evaluator import NumericEvaluator
from src.utils.quite_dataset_loaders import quite_dataset_mappings
from datasets import Split, Dataset
import numpy as np

# Always predict 50% likelihood

In [None]:
predictions: list[float] = []
ground_truths: list[float] = []

ne: NumericEvaluator = NumericEvaluator()

quite_test_dataset: Dataset = quite_dataset_mappings[Split.TEST][QUITE_Config.EVIDENCE_QUERY_PAIRS.value]

for row in quite_test_dataset:
    if row["type"] == "query":
        predictions.append(0.5)
        ground_truths.append(row["answer"])

results: dict[str, float] = ne.get_metrics(predicted_probs=predictions, true_probs=ground_truths, reasoning_types=[["causal"] for _ in range(len(predictions))])

print(results)

# Always predict average probability of train set

In [None]:
train_probs: list[float] = []
results = None

quite_train_dataset: Dataset = quite_dataset_mappings[Split.TRAIN][QUITE_Config.EVIDENCE_QUERY_PAIRS.value]
quite_test_dataset: Dataset = quite_dataset_mappings[Split.TEST][QUITE_Config.EVIDENCE_QUERY_PAIRS.value]

for row in quite_train_dataset:
    if row["type"] == "query":
        train_probs.append(row["answer"])

avg_prob: float = np.average(train_probs)

print(avg_prob)

predictions: list[float] = []
ground_truths: list[float] = []
for row in quite_test_dataset:
    if row["type"] == "query":
        predictions.append(avg_prob)
        ground_truths.append(row["answer"])

results = ne.get_metrics(predicted_probs=predictions, true_probs=ground_truths, reasoning_types=[["causal"] for _ in range(len(predictions))])

print(results)