# Pipeline 

In [63]:
%load_ext autoreload
%autoreload 2

from averitec import Datapoint
from evidence_generation import GptEvidenceGenerator, GptBatchedEvidenceGenerator, DynamicFewShotBatchedEvidenceGenerator
from classification import DefaultClassifier, HuggingfaceClassifier, AverageEnsembleClassifier, LogRegEnsembleClassifier
from retrieval import SimpleFaissRetriever, Retriever, MmrFaissRetriever, SubqueryRetriever
from pipeline import Pipeline, MockPipeline
import pickle
from labels import label2id, id2label
import numpy as np
from sklearn.metrics import classification_report
import random
from tqdm import tqdm
random.seed(111)

import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Pipeline test

In [64]:
split = "test"
path = "/mnt/data/factcheck/averitec-data/"
with open(path + f"data/{split}.json") as f:
    dataset = json.load(f)
    for i in range(len(dataset)):
        dataset[i]["claim_id"] = i
    datapoints = [Datapoint.from_dict(d) for d in dataset]
    

In [65]:
datapoint = Datapoint.from_dict(dataset[150])
datapoint

Datapoint(claim="A research organisation has confirmed Philippine Vice President Leni Robredo received zero votes in the country's Mindanao island.", claim_id=150, claim_date='11-3-2021', speaker='Showbiz Fanaticz', original_claim_url='https://www.youtube.com/watch?v=ErL5IzJkqh8&t=6s', reporting_source='Youtube', location_ISO_code='PH', label=None, metadata={})

In [66]:
# retriever = SimpleFaissRetriever(path="/mnt/data/factcheck/averitec-data/data_store/vecstore/dev/6k")
retriever = MmrFaissRetriever(path=f"/mnt/data/factcheck/averitec-data/data_store/vecstore/{split}/2k")
retrieval_result = retriever(datapoint)
retrieval_result

RetrievalResult(documents=[Document(metadata={'url': 'https://newsinfo.inquirer.net/1396547/the-4-year-robredo-marcos-vice-presidential-case', 'context_before': '', 'context_after': 'READ: Marcos asks SC to set aside Robredo proclamation, files protest Despite Marcos’ appeal to postpone the proclamation, Robredo still takes her oath of office and is inaugurated as vice president. READ: FULL TEXT: Vice President Leni Robredo inaugural speech In a two-page resolution, the SC summons Robredo to respond in ten days on Marcos’ election protest. The high court also asks the Commission Elections (Comelec) to preserve election returns (ERs) that are subject to protest through a Precautionary Protection Order (PPO). The Marcos camp files a three-page letter-manifestation questioning the Comelec’s decision to issue a resolution approving the stripping of the vote-counting machines (VCMs) and laptops used in the May 9 polls, despite the PPO from PET. The stripping activity involves the backup of 

In [67]:
datapoint = Datapoint.from_dict(dataset[2])
datapoint.claim, datapoint.claim_date, datapoint.speaker

('South African hospital found that traces of the novel coronavirus had survived on the surface of fresh food items for 12 hours during lab tests.',
 '3-12-2021',
 None)

In [70]:
path = "/mnt/data/factcheck/averitec-data/"
target = path + "data_store/vecstore/dev/6k"
PIPELINE_NAME = "mmr+gpt4o-dfewshot"
pipeline = Pipeline(
    #dumps = "/mnt/data/factcheck/averitec-data/data_store/submissions/dev_mmr+gpt4o.pkl",
    # SubqueryRetriever(retriever),
    retriever,
    evidence_generator=DynamicFewShotBatchedEvidenceGenerator(), 
    classifier=DefaultClassifier()
)

submission = []
dump = []

for dp in tqdm(datapoints):
    pipeline_result = pipeline(dp)
    submission.append(pipeline_result.to_submission())
    dump.append(pipeline_result)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.json", "w") as f:
    json.dump(submission, f, indent=4)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.pkl", "wb") as f:
    pickle.dump(dump, f)

100%|██████████| 2215/2215 [03:58<00:00,  9.30it/s]


In [71]:
files = pipeline.evidence_generator.get_batch_files(path=f"/mnt/data/factcheck/averitec-data/data_store/batch_jobs/{split}_{PIPELINE_NAME}", batch_size=75)

In [72]:
files

['/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_1.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_2.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_3.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_4.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_5.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_6.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_7.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_8.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_9.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/test_mmr+gpt4o-dfewshot/batch_10.jsonl',
 '/mnt/data/factche

In [73]:
batch_results = pipeline.evidence_generator.submit_and_await_batches(files, f"/mnt/data/factcheck/averitec-data/data_store/batch_jobs/{split}_{PIPELINE_NAME}/output.jsonl")

  0%|          | 0/30 [00:00<?, ?it/s]

Batch(id='batch_tL2RYVAmsm3CmfbzkcxIMwvy', completion_window='24h', created_at=1721945124, endpoint='/v1/chat/completions', input_file_id='file-4a6JzXH9Nvbwx2Dw15jD5Zeq', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031524, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 1'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_tL2RYVAmsm3CmfbzkcxIMwvy
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_tL2RYVAmsm3CmfbzkcxIMwvy
waiting for batch to complete BatchRequestCounts(completed=67, failed=0, total=75) batch_tL2RYVAmsm3CmfbzkcxIMwvy
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_tL2RYVAmsm3CmfbzkcxIMwvy
waiting for b

  3%|▎         | 1/30 [01:25<41:22, 85.61s/it]

Batch(id='batch_gSvaVoB9y1UGMsRBGVOOqlPn', completion_window='24h', created_at=1721945210, endpoint='/v1/chat/completions', input_file_id='file-p44OVIzgSttdjTfLcEPCalzF', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031610, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 2'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_gSvaVoB9y1UGMsRBGVOOqlPn
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_gSvaVoB9y1UGMsRBGVOOqlPn
waiting for batch to complete BatchRequestCounts(completed=68, failed=0, total=75) batch_gSvaVoB9y1UGMsRBGVOOqlPn
waiting for batch to complete BatchRequestCounts(completed=75, failed=0, total=75) batch_gSvaVoB9y1UGMsRBGVOOqlPn
batch 2 compl

  7%|▋         | 2/30 [02:10<28:38, 61.37s/it]

Batch(id='batch_RmPn3dzQpjIUGfmvjCMJbXtk', completion_window='24h', created_at=1721945254, endpoint='/v1/chat/completions', input_file_id='file-1bDjonbQbKggiJaDO20a3Qlr', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031654, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 3'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_RmPn3dzQpjIUGfmvjCMJbXtk
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_RmPn3dzQpjIUGfmvjCMJbXtk
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_RmPn3dzQpjIUGfmvjCMJbXtk
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_RmPn3dzQpjIUGfmvjCMJbXtk
waiting for bat

 10%|█         | 3/30 [03:55<36:42, 81.57s/it]

Batch(id='batch_GNEpBbX7vKkwPodFtO4URZPo', completion_window='24h', created_at=1721945360, endpoint='/v1/chat/completions', input_file_id='file-DHrF7oFDa7baK98RNpw9Pe77', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031760, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 4'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_GNEpBbX7vKkwPodFtO4URZPo
waiting for batch to complete BatchRequestCounts(completed=3, failed=0, total=75) batch_GNEpBbX7vKkwPodFtO4URZPo
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_GNEpBbX7vKkwPodFtO4URZPo
batch 4 completed


 13%|█▎        | 4/30 [04:29<27:14, 62.86s/it]

Batch(id='batch_3jAjN9T8sezNY27hsaF7whZ1', completion_window='24h', created_at=1721945394, endpoint='/v1/chat/completions', input_file_id='file-kG2H7BBZDIv8LKwshY2323xa', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031794, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 5'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_3jAjN9T8sezNY27hsaF7whZ1
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_3jAjN9T8sezNY27hsaF7whZ1
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_3jAjN9T8sezNY27hsaF7whZ1
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_3jAjN9T8sezNY27hsaF7whZ1
waiting for b

 17%|█▋        | 5/30 [05:55<29:34, 70.98s/it]

Batch(id='batch_qjgTUBtPKYIMxyEZvdaT3NGM', completion_window='24h', created_at=1721945480, endpoint='/v1/chat/completions', input_file_id='file-7iqUejCkwhSCVKwEQN1QNrPL', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031880, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 6'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_qjgTUBtPKYIMxyEZvdaT3NGM
waiting for batch to complete BatchRequestCounts(completed=1, failed=0, total=75) batch_qjgTUBtPKYIMxyEZvdaT3NGM
waiting for batch to complete BatchRequestCounts(completed=66, failed=0, total=75) batch_qjgTUBtPKYIMxyEZvdaT3NGM
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_qjgTUBtPKYIMxyEZvdaT3NGM
waiting for b

 20%|██        | 6/30 [07:20<30:22, 75.95s/it]

Batch(id='batch_UclV9QBYV6qiGMKh8gUYhF7N', completion_window='24h', created_at=1721945565, endpoint='/v1/chat/completions', input_file_id='file-AoVGnvWPqRAXPsQ6njhComXX', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722031965, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 7'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_UclV9QBYV6qiGMKh8gUYhF7N
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_UclV9QBYV6qiGMKh8gUYhF7N
waiting for batch to complete BatchRequestCounts(completed=53, failed=0, total=75) batch_UclV9QBYV6qiGMKh8gUYhF7N
waiting for batch to complete BatchRequestCounts(completed=67, failed=0, total=75) batch_UclV9QBYV6qiGMKh8gUYhF7N
waiting for b

 23%|██▎       | 7/30 [09:47<37:56, 98.99s/it]

Batch(id='batch_OF2JGs3nmBwEAXmQYc8JHvv8', completion_window='24h', created_at=1721945711, endpoint='/v1/chat/completions', input_file_id='file-WZh1Gj80iOP8a5a3rY3fciVK', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032111, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 8'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_OF2JGs3nmBwEAXmQYc8JHvv8
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_OF2JGs3nmBwEAXmQYc8JHvv8
waiting for batch to complete BatchRequestCounts(completed=68, failed=0, total=75) batch_OF2JGs3nmBwEAXmQYc8JHvv8
waiting for batch to complete BatchRequestCounts(completed=75, failed=0, total=75) batch_OF2JGs3nmBwEAXmQYc8JHvv8
batch 8 compl

 27%|██▋       | 8/30 [10:31<29:55, 81.62s/it]

Batch(id='batch_t4JrNWy8ikf7s1ocV09VYNk8', completion_window='24h', created_at=1721945756, endpoint='/v1/chat/completions', input_file_id='file-x3HYMbkVbWGKNPLmd6NvCq2A', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032156, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 9'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_t4JrNWy8ikf7s1ocV09VYNk8
waiting for batch to complete BatchRequestCounts(completed=4, failed=0, total=75) batch_t4JrNWy8ikf7s1ocV09VYNk8
waiting for batch to complete BatchRequestCounts(completed=54, failed=0, total=75) batch_t4JrNWy8ikf7s1ocV09VYNk8
batch 9 completed


 30%|███       | 9/30 [11:05<23:22, 66.77s/it]

Batch(id='batch_Hn6w5INaiyFS52ZxHLMkr084', completion_window='24h', created_at=1721945791, endpoint='/v1/chat/completions', input_file_id='file-Y1gKhLY89vW37fn5GPcfSFGC', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032191, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 10'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_Hn6w5INaiyFS52ZxHLMkr084
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_Hn6w5INaiyFS52ZxHLMkr084
waiting for batch to complete BatchRequestCounts(completed=65, failed=0, total=75) batch_Hn6w5INaiyFS52ZxHLMkr084
batch 10 completed


 33%|███▎      | 10/30 [11:40<18:58, 56.90s/it]

Batch(id='batch_cuESOYYsLjBLeuBzCWruaGTG', completion_window='24h', created_at=1721945825, endpoint='/v1/chat/completions', input_file_id='file-ydCa3Gb2B4LTHXB9VvVr4fyD', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032225, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 11'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_cuESOYYsLjBLeuBzCWruaGTG
waiting for batch to complete BatchRequestCounts(completed=1, failed=0, total=75) batch_cuESOYYsLjBLeuBzCWruaGTG
waiting for batch to complete BatchRequestCounts(completed=70, failed=0, total=75) batch_cuESOYYsLjBLeuBzCWruaGTG
batch 11 completed


 37%|███▋      | 11/30 [12:14<15:49, 50.00s/it]

Batch(id='batch_Z8iHRwvXFeZbxCyBM7RwwO8u', completion_window='24h', created_at=1721945859, endpoint='/v1/chat/completions', input_file_id='file-w8K6EwsAXiLOwZ47ThgQK6OE', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032259, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 12'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_Z8iHRwvXFeZbxCyBM7RwwO8u
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_Z8iHRwvXFeZbxCyBM7RwwO8u
waiting for batch to complete BatchRequestCounts(completed=71, failed=0, total=75) batch_Z8iHRwvXFeZbxCyBM7RwwO8u
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_Z8iHRwvXFeZbxCyBM7RwwO8u
waiting for 

 40%|████      | 12/30 [13:40<18:14, 60.79s/it]

Batch(id='batch_nSYo1I3J4kPx8GuAAXzR0Fcw', completion_window='24h', created_at=1721945944, endpoint='/v1/chat/completions', input_file_id='file-dG5Aqx8ZrKxOCjivpIUSD8WZ', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032344, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 13'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_nSYo1I3J4kPx8GuAAXzR0Fcw
waiting for batch to complete BatchRequestCounts(completed=6, failed=0, total=75) batch_nSYo1I3J4kPx8GuAAXzR0Fcw
waiting for batch to complete BatchRequestCounts(completed=68, failed=0, total=75) batch_nSYo1I3J4kPx8GuAAXzR0Fcw
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_nSYo1I3J4kPx8GuAAXzR0Fcw
waiting for 

 43%|████▎     | 13/30 [15:04<19:15, 67.94s/it]

Batch(id='batch_RsRa65Pu54IqsckuveP2S3xN', completion_window='24h', created_at=1721946029, endpoint='/v1/chat/completions', input_file_id='file-SKKlfwxh4doFaH5U3sX30sWO', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032429, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 14'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_RsRa65Pu54IqsckuveP2S3xN
waiting for batch to complete BatchRequestCounts(completed=3, failed=0, total=75) batch_RsRa65Pu54IqsckuveP2S3xN
waiting for batch to complete BatchRequestCounts(completed=65, failed=0, total=75) batch_RsRa65Pu54IqsckuveP2S3xN
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_RsRa65Pu54IqsckuveP2S3xN
waiting for 

 47%|████▋     | 14/30 [15:59<17:01, 63.84s/it]

Batch(id='batch_kDTkxqG4RIH7AAVEOnBhU9Tx', completion_window='24h', created_at=1721946083, endpoint='/v1/chat/completions', input_file_id='file-oMAVEkg0MawhnnRcy13cDxka', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032483, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 15'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_kDTkxqG4RIH7AAVEOnBhU9Tx
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_kDTkxqG4RIH7AAVEOnBhU9Tx
waiting for batch to complete BatchRequestCounts(completed=59, failed=0, total=75) batch_kDTkxqG4RIH7AAVEOnBhU9Tx
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_kDTkxqG4RIH7AAVEOnBhU9Tx
batch 15 com

 50%|█████     | 15/30 [16:42<14:26, 57.76s/it]

Batch(id='batch_ckhDznEt1NK7PnaDtNgEmNgV', completion_window='24h', created_at=1721946127, endpoint='/v1/chat/completions', input_file_id='file-39wHcvTTyG5MXEeRKe1x57xf', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032527, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 16'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_ckhDznEt1NK7PnaDtNgEmNgV
waiting for batch to complete BatchRequestCounts(completed=3, failed=0, total=75) batch_ckhDznEt1NK7PnaDtNgEmNgV
waiting for batch to complete BatchRequestCounts(completed=59, failed=0, total=75) batch_ckhDznEt1NK7PnaDtNgEmNgV
waiting for batch to complete BatchRequestCounts(completed=69, failed=0, total=75) batch_ckhDznEt1NK7PnaDtNgEmNgV
waiting for 

 53%|█████▎    | 16/30 [17:37<13:15, 56.81s/it]

Batch(id='batch_oAtrTaK2G4621L2nK32QjHka', completion_window='24h', created_at=1721946182, endpoint='/v1/chat/completions', input_file_id='file-EOwqkOj1wWbe3CNqVHE6tiGQ', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032582, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 17'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_oAtrTaK2G4621L2nK32QjHka
waiting for batch to complete BatchRequestCounts(completed=4, failed=0, total=75) batch_oAtrTaK2G4621L2nK32QjHka
waiting for batch to complete BatchRequestCounts(completed=57, failed=0, total=75) batch_oAtrTaK2G4621L2nK32QjHka
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_oAtrTaK2G4621L2nK32QjHka
waiting for 

 57%|█████▋    | 17/30 [18:31<12:09, 56.10s/it]

Batch(id='batch_n6vMlY5W1FOlwcoPBkIgTUqK', completion_window='24h', created_at=1721946236, endpoint='/v1/chat/completions', input_file_id='file-awJwgg4Pi98AHDU0yRt7nRZU', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032636, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 18'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_n6vMlY5W1FOlwcoPBkIgTUqK
waiting for batch to complete BatchRequestCounts(completed=8, failed=0, total=75) batch_n6vMlY5W1FOlwcoPBkIgTUqK
waiting for batch to complete BatchRequestCounts(completed=69, failed=0, total=75) batch_n6vMlY5W1FOlwcoPBkIgTUqK
waiting for batch to complete BatchRequestCounts(completed=75, failed=0, total=75) batch_n6vMlY5W1FOlwcoPBkIgTUqK
batch 18 com

 60%|██████    | 18/30 [19:15<10:29, 52.50s/it]

Batch(id='batch_RvVf7gCP53O0oxedTQjDVoSj', completion_window='24h', created_at=1721946280, endpoint='/v1/chat/completions', input_file_id='file-nCDeeG3oWX5Oc0kVTT1Qrvog', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032680, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 19'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_RvVf7gCP53O0oxedTQjDVoSj
waiting for batch to complete BatchRequestCounts(completed=4, failed=0, total=75) batch_RvVf7gCP53O0oxedTQjDVoSj
waiting for batch to complete BatchRequestCounts(completed=70, failed=0, total=75) batch_RvVf7gCP53O0oxedTQjDVoSj
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_RvVf7gCP53O0oxedTQjDVoSj
waiting for 

 63%|██████▎   | 19/30 [20:30<10:51, 59.21s/it]

Batch(id='batch_hfVpDAG6uQ1Vw2iIaUTu3Q9T', completion_window='24h', created_at=1721946354, endpoint='/v1/chat/completions', input_file_id='file-uJEmcRQP985rDQKWf7BUn1S4', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032754, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 20'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_hfVpDAG6uQ1Vw2iIaUTu3Q9T
waiting for batch to complete BatchRequestCounts(completed=4, failed=0, total=75) batch_hfVpDAG6uQ1Vw2iIaUTu3Q9T
waiting for batch to complete BatchRequestCounts(completed=60, failed=0, total=75) batch_hfVpDAG6uQ1Vw2iIaUTu3Q9T
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_hfVpDAG6uQ1Vw2iIaUTu3Q9T
batch 20 com

 67%|██████▋   | 20/30 [21:14<09:04, 54.47s/it]

Batch(id='batch_5BcXEu6EuOFNiiLmYPcs6Hw0', completion_window='24h', created_at=1721946398, endpoint='/v1/chat/completions', input_file_id='file-nnTk6EV8kAg16DcLQ90wtx3K', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032798, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 21'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_5BcXEu6EuOFNiiLmYPcs6Hw0
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_5BcXEu6EuOFNiiLmYPcs6Hw0
waiting for batch to complete BatchRequestCounts(completed=63, failed=0, total=75) batch_5BcXEu6EuOFNiiLmYPcs6Hw0
batch 21 completed


 70%|███████   | 21/30 [21:48<07:14, 48.27s/it]

Batch(id='batch_mTae724ijaxV2e1Ab3XzZdwv', completion_window='24h', created_at=1721946432, endpoint='/v1/chat/completions', input_file_id='file-kVr4mo1OZkpSJpeobyPLelkK', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032832, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 22'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_mTae724ijaxV2e1Ab3XzZdwv
waiting for batch to complete BatchRequestCounts(completed=3, failed=0, total=75) batch_mTae724ijaxV2e1Ab3XzZdwv
waiting for batch to complete BatchRequestCounts(completed=70, failed=0, total=75) batch_mTae724ijaxV2e1Ab3XzZdwv
batch 22 completed


 73%|███████▎  | 22/30 [22:21<05:51, 43.93s/it]

Batch(id='batch_NPAVJLvSIjSEdW1jZLtJPyUi', completion_window='24h', created_at=1721946466, endpoint='/v1/chat/completions', input_file_id='file-xjxjMDgbBSqQkkb5uL4NEkVT', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032866, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 23'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_NPAVJLvSIjSEdW1jZLtJPyUi
waiting for batch to complete BatchRequestCounts(completed=1, failed=0, total=75) batch_NPAVJLvSIjSEdW1jZLtJPyUi
waiting for batch to complete BatchRequestCounts(completed=50, failed=0, total=75) batch_NPAVJLvSIjSEdW1jZLtJPyUi
waiting for batch to complete BatchRequestCounts(completed=72, failed=0, total=75) batch_NPAVJLvSIjSEdW1jZLtJPyUi
waiting for 

 77%|███████▋  | 23/30 [23:16<05:29, 47.04s/it]

Batch(id='batch_evxOGVFCHK5xXyLBd5hG0bui', completion_window='24h', created_at=1721946520, endpoint='/v1/chat/completions', input_file_id='file-PcCXmkXDL0dNU6UbUOMpBm4X', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032920, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 24'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_evxOGVFCHK5xXyLBd5hG0bui
waiting for batch to complete BatchRequestCounts(completed=1, failed=0, total=75) batch_evxOGVFCHK5xXyLBd5hG0bui
waiting for batch to complete BatchRequestCounts(completed=58, failed=0, total=75) batch_evxOGVFCHK5xXyLBd5hG0bui
waiting for batch to complete BatchRequestCounts(completed=72, failed=0, total=75) batch_evxOGVFCHK5xXyLBd5hG0bui
batch 24 com

 80%|████████  | 24/30 [24:00<04:36, 46.12s/it]

Batch(id='batch_ZXBzOWEwKycspmUs8GCH76Pt', completion_window='24h', created_at=1721946567, endpoint='/v1/chat/completions', input_file_id='file-SqQ5U8S9xdwnDKL11r70cMUy', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722032967, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 25'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_ZXBzOWEwKycspmUs8GCH76Pt
waiting for batch to complete BatchRequestCounts(completed=4, failed=0, total=75) batch_ZXBzOWEwKycspmUs8GCH76Pt
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_ZXBzOWEwKycspmUs8GCH76Pt
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_ZXBzOWEwKycspmUs8GCH76Pt
waiting for 

 83%|████████▎ | 25/30 [25:17<04:37, 55.47s/it]

Batch(id='batch_eyUQ78k68gZvyQbphbiMtFAv', completion_window='24h', created_at=1721946641, endpoint='/v1/chat/completions', input_file_id='file-mMHPy0vhSXSX9Bt4i6X719h0', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722033041, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 26'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_eyUQ78k68gZvyQbphbiMtFAv
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_eyUQ78k68gZvyQbphbiMtFAv
waiting for batch to complete BatchRequestCounts(completed=74, failed=0, total=75) batch_eyUQ78k68gZvyQbphbiMtFAv
batch 26 completed


 87%|████████▋ | 26/30 [25:51<03:15, 48.96s/it]

Batch(id='batch_HmYKQixleT2eyp7IW49zDKZ3', completion_window='24h', created_at=1721946675, endpoint='/v1/chat/completions', input_file_id='file-U44KYLikrG9wNfk6IeijnVa6', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722033075, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 27'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_HmYKQixleT2eyp7IW49zDKZ3
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_HmYKQixleT2eyp7IW49zDKZ3
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_HmYKQixleT2eyp7IW49zDKZ3
batch 27 completed


 90%|█████████ | 27/30 [26:24<02:12, 44.23s/it]

Batch(id='batch_IJUbL1EfbA27aAsJaW4c0PT9', completion_window='24h', created_at=1721946708, endpoint='/v1/chat/completions', input_file_id='file-kjDABPpUTU54qs0H0npPSjXc', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722033108, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 28'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_IJUbL1EfbA27aAsJaW4c0PT9
waiting for batch to complete BatchRequestCounts(completed=2, failed=0, total=75) batch_IJUbL1EfbA27aAsJaW4c0PT9
waiting for batch to complete BatchRequestCounts(completed=73, failed=0, total=75) batch_IJUbL1EfbA27aAsJaW4c0PT9
batch 28 completed


 93%|█████████▎| 28/30 [26:57<01:21, 40.88s/it]

Batch(id='batch_4nJCxS2R66kfR4N4QvyRK9xm', completion_window='24h', created_at=1721946741, endpoint='/v1/chat/completions', input_file_id='file-Q8aWBCuh3QdBQUCIz8WjAqlx', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722033141, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 29'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_4nJCxS2R66kfR4N4QvyRK9xm
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=75) batch_4nJCxS2R66kfR4N4QvyRK9xm
waiting for batch to complete BatchRequestCounts(completed=71, failed=0, total=75) batch_4nJCxS2R66kfR4N4QvyRK9xm
waiting for batch to complete BatchRequestCounts(completed=75, failed=0, total=75) batch_4nJCxS2R66kfR4N4QvyRK9xm
batch 29 com

 97%|█████████▋| 29/30 [27:41<00:41, 41.79s/it]

Batch(id='batch_T230gxwCgflpIT9LRcGmpwSM', completion_window='24h', created_at=1721946785, endpoint='/v1/chat/completions', input_file_id='file-hAgxywXRQF1DCGD1oUrvIlfK', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1722033185, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'dev-set job, batch 30'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))
waiting for batch to complete BatchRequestCounts(completed=0, failed=0, total=0) batch_T230gxwCgflpIT9LRcGmpwSM
waiting for batch to complete BatchRequestCounts(completed=6, failed=0, total=40) batch_T230gxwCgflpIT9LRcGmpwSM
waiting for batch to complete BatchRequestCounts(completed=39, failed=0, total=40) batch_T230gxwCgflpIT9LRcGmpwSM
batch 30 completed


100%|██████████| 30/30 [28:14<00:00, 56.49s/it]


In [74]:
new_dump = []
pipeline.evidence_generator.fallback_gpt_generator.client.temperature = .5
for pipeline_result, batch_result in zip(dump[:len(batch_results)], batch_results):
    new_result = pipeline.evidence_generator.update_pipeline_result(pipeline_result, batch_result, pipeline.classifier)
    new_dump.append(new_result)


Error parsing JSON for EvidenceGenerator.
 
{
    "questions":
        [
            {"question": "What does Georgia's SB 202 law state about giving water to voters?", "answer": "The law states that no
failed, using fallback gpt


In [75]:
(
    new_dump[1],
    new_dump[1].evidence_generation_result,
    new_dump[1].classification_result
)

(PipelineResult(datapoint=Datapoint(claim='Meat packing workers have suffered more COVID-19 cases than health care workers.', claim_id=1, claim_date='3-12-2021', speaker='Bruce Blackburn', original_claim_url=None, reporting_source='Facebook', location_ISO_code='US', label=None, metadata={}), evidence_generation_result=EvidenceGenerationResult(evidences=[Evidence(question='How many COVID-19 cases have been reported among meatpacking workers?', answer='As of September 22, at least 42,708 people in 496 meat and poultry plants have been infected with COVID-19.', url='https://www.epi.org/blog/meat-and-poultry-worker-demographics/', scraped_text='\nIn September the U.S. Department of Labor issued its first citation against two meatpacking plants for failing to protect employees from exposure to the coronavirus. At those plants alone, almost 1,500 workers have been infected and at least 12 have died, but the fines totaled just $29,000—an amount criticized as far too lenient by experts, former

In [76]:
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.json", "w") as f:
    json.dump([d.to_submission() for d in new_dump], f, indent=4)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.pkl", "wb") as f:
    pickle.dump(new_dump, f)

In [None]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submission_dev.json

In [26]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submissions/dev_subquery+gpt4o.json

Question-only score (HU-meteor):             0.45140886312940254
Question-answer score (HU-meteor):           0.27770055976133534
Veracity F1 scores:
 * Supported:                                0.6937269372693727
 * Refuted:                                  0.7979966611018364
 * Not Enough Evidence:                      0.20512820512820512
 * Conflicting Evidence/Cherrypicking:       0.038461538461538464
 * macro:                                    0.4338283354902382
 * acc:                                      0.684
--------------------
AVeriTeC scores:
 * Veracity scores (meteor @ 0.1):           0.67
 * Veracity scores (meteor @ 0.2):           0.532
 * Veracity scores (meteor @ 0.25):          0.4
 * Veracity scores (meteor @ 0.3):           0.266
 * Veracity scores (meteor @ 0.4):           0.092
 * Veracity scores (meteor @ 0.5):           0.042
--------------------
AVeriTeC scores by type @ 0.25:
 * Veracity scores (Event/Property Claim):   0.1925972444043578
 * Veracity scores

In [62]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.json

Question-only score (HU-meteor):             0.4504587577119231
Question-answer score (HU-meteor):           0.2854870005624111
Veracity F1 scores:
 * Supported:                                0.7357142857142858
 * Refuted:                                  0.8245033112582781
 * Not Enough Evidence:                      0.14925373134328357
 * Conflicting Evidence/Cherrypicking:       0.12244897959183673
 * macro:                                    0.45798007697692106
 * acc:                                      0.72
--------------------
AVeriTeC scores:
 * Veracity scores (meteor @ 0.1):           0.704
 * Veracity scores (meteor @ 0.2):           0.572
 * Veracity scores (meteor @ 0.25):          0.422
 * Veracity scores (meteor @ 0.3):           0.286
 * Veracity scores (meteor @ 0.4):           0.126
 * Veracity scores (meteor @ 0.5):           0.046
--------------------
AVeriTeC scores by type @ 0.25:
 * Veracity scores (Event/Property Claim):   0.20454380902113348
 * Veracity score

In [22]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.json

Question-only score (HU-meteor):             0.44559202247101487
Question-answer score (HU-meteor):           0.27486491800924373
Veracity F1 scores:
 * Supported:                                0.6566037735849056
 * Refuted:                                  0.8098360655737705
 * Not Enough Evidence:                      0.1891891891891892
 * Conflicting Evidence/Cherrypicking:       0.0784313725490196
 * macro:                                    0.43351510022422124
 * acc:                                      0.686
--------------------
AVeriTeC scores:
 * Veracity scores (meteor @ 0.1):           0.672
 * Veracity scores (meteor @ 0.2):           0.532
 * Veracity scores (meteor @ 0.25):          0.394
 * Veracity scores (meteor @ 0.3):           0.256
 * Veracity scores (meteor @ 0.4):           0.106
 * Veracity scores (meteor @ 0.5):           0.03
--------------------
AVeriTeC scores by type @ 0.25:
 * Veracity scores (Event/Property Claim):   0.18721511530819013
 * Veracity score

In [21]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submissions/dev_{PIPELINE_NAME}.json

Question-only score (HU-meteor):             0.4505443758501816
Question-answer score (HU-meteor):           0.2848630013774468
Veracity F1 scores:
 * Supported:                                0.7003610108303249
 * Refuted:                                  0.831973898858075
 * Not Enough Evidence:                      0.29508196721311475
 * Conflicting Evidence/Cherrypicking:       0.08163265306122448
 * macro:                                    0.47726238249068476
 * acc:                                      0.726
--------------------
AVeriTeC scores:
 * Veracity scores (meteor @ 0.1):           0.71
 * Veracity scores (meteor @ 0.2):           0.574
 * Veracity scores (meteor @ 0.25):          0.42
 * Veracity scores (meteor @ 0.3):           0.304
 * Veracity scores (meteor @ 0.4):           0.122
 * Veracity scores (meteor @ 0.5):           0.044
--------------------
AVeriTeC scores by type @ 0.25:
 * Veracity scores (Event/Property Claim):   0.19985145249552058
 * Veracity scores 

## collapsible begin

In [None]:
from IPython.display import display, Markdown, Latex

In [None]:
knn_retrieval_result = retriever(datapoint)
display(Markdown("### 🗯️ " + datapoint.claim))
display(Markdown("*Retrieved by knn*\n\n"))
# sample 3
for r in knn_retrieval_result:
    newline = "\n"
    display(Markdown(f"**{r.metadata['url']}**\n\n{r.page_content[:256]}"))

In [None]:
from retrieval import MmrFaissRetriever

mmr_retriever = MmrFaissRetriever(retriever.path)
mmr_retrieval_result = mmr_retriever(datapoint)
display(Markdown("### 🗯️ " + datapoint.claim))
display(Markdown("*Retrieved by MMR*\n\n"))
# sample 3
for r in mmr_retrieval_result:
    newline = "\n"
    display(Markdown(f"**{r.metadata['url']}**\n\n{r.page_content[:256]}"))

In [None]:
subquery_retriever = SubqueryRetriever(retriever)
subquery_retrieval_result = subquery_retriever(datapoint)
display(Markdown("### 🗯️ " + datapoint.claim))
display(Markdown("*Retrieved by subqueries*\n\n"))
# sample 3
for r in subquery_retrieval_result:
    newline = "\n"
    display(Markdown(f"**{r.metadata['url']}**\n\n*{';'.join(r.metadata['queries'])}*\n\n{r.page_content[:256]}"))

In [None]:
subquery_retrieval_result.metadata

## Collapsible section end

In [None]:
evidence_generator = GptBatchedEvidenceGenerator("gpt-4o")
evidence_generation_result = evidence_generator(datapoint, retrieval_result)
evidence_generation_result

In [None]:
evidence_generation_result.metadata["suggested_label"]

In [None]:
datapoint.label

In [None]:
classifier = DefaultClassifier()
classification_result = classifier(datapoint, evidence_generation_result, retrieval_result)
str(classification_result), classification_result

In [None]:
datapoint2 = Datapoint.from_dict(dataset[16])
pipeline = Pipeline(retriever, evidence_generator, classifier)
pipeline_result = pipeline(datapoint2)
pipeline_result

In [None]:
str(pipeline_result.classification_result), datapoint2.label

In [None]:
pipeline_result.to_submission()

In [None]:
# pickle dump pipeline result
import pickle
with open('data/pipeline_result.pkl', 'wb') as f:
    pickle.dump(pipeline_result, f)

In [None]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submission_dev_avg_clf.json