In [8]:
import pandas as pd

In [9]:
dataset = pd.read_json('datasets/fever2-fixers-dev.jsonl', lines=True)
print(len(dataset))
print(dataset.columns)

1174
Index(['id', 'label', 'claim', 'evidence', 'original_id', 'transformation',
       'attack', 'annotation'],
      dtype='object')


In [10]:
# Check that every row has a valid claim (str) and label ("SUPPORTS", "REFUTES", "NOT ENOUGH INFO")
def check_claim(row):
    ok = True
    if not isinstance(row['claim'], str):
        print(row['claim'])
        ok = False
    if not isinstance(row['label'], str):
        print(row['label'])
        ok = False
    if row['label'].upper() not in ['SUPPORTS', 'REFUTES', 'NOT ENOUGH INFO']:
        ok = False
        print(row['label']) 
    return ok
valid_rows = dataset.apply(check_claim, axis=1)
print(f"Number of valid rows: {valid_rows.sum()}")

Number of valid rows: 1174


In [None]:
# Test run
from core.processing import process_query

example = dataset.iloc[42]
print(example['claim'])
print(example['label'])

async def process_claim(claim: str):
    return await process_query(text=claim, selected_sources=None)


def translate_label(label: str) -> str:
    """
    Translate the newsagent label to the Fever2 label.
    :param label: The newsagent label.
    :return: The Fever2 label.
    """
    if label == 'true':
        return 'SUPPORTS'
    elif label == 'false':
        return 'REFUTES'
    else:
        return 'NOT ENOUGH INFO'
    
async def evaluate_row(row: pd.Series):
    claim = row['claim']
    label = row['label']
    analysis = await process_query(text=claim, selected_sources=None)
    
    # Here we're assuming there's only one claim processed by newsagent
    # TODO: soften this assumption, OR hook up the verdict agent
    proposed_label = analysis[0]['label']
    predicted_label = translate_label(proposed_label)
    print(f"Ground truth: {label}, predicted: {predicted_label}")
    # Check if the predicted label is correct
    if predicted_label.upper() == label.upper():
        print("Correct prediction")
    else:
        print("Incorrect prediction")

    return label.upper(), predicted_label.upper()
    

Globalism is not a group of ideologies that oppose the concept of globalization.
SUPPORTS


In [12]:
result = await evaluate_row(example)

Ground truth: SUPPORTS, predicted: NOT ENOUGH INFO
Incorrect prediction


In [13]:
print(result)

('SUPPORTS', 'NOT ENOUGH INFO')
