# Pipeline 

In [18]:
%load_ext autoreload
%autoreload 2

from averitec import Datapoint
from evidence_generation import GptEvidenceGenerator, GptBatchedEvidenceGenerator
from classification import DefaultClassifier, HuggingfaceClassifier, AverageEnsembleClassifier, LogRegEnsembleClassifier
from retrieval import SimpleFaissRetriever, Retriever, MmrFaissRetriever, SubqueryRetriever
from pipeline import Pipeline, MockPipeline
import pickle
from labels import label2id, id2label
import numpy as np
from sklearn.metrics import classification_report
import random
from tqdm import tqdm
random.seed(111)

import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Pipeline test

In [19]:
split = "dev"
path = "/mnt/data/factcheck/averitec-data/"
with open(path + f"data/{split}.json") as f:
    dataset = json.load(f)
    for i in range(len(dataset)):
        dataset[i]["claim_id"] = i
    datapoints = [Datapoint.from_dict(d) for d in dataset]
    

In [20]:
datapoint = Datapoint.from_dict(dataset[150])
datapoint

Datapoint(claim='Zimbabwe gets more than half of the African Export Import Bank loans.', claim_id=150, claim_date='7-10-2020', speaker='Tendai Biti', original_claim_url='https://web.archive.org/web/20210129100929im_/https://lh4.googleusercontent.com/WwmjzhYNC1g84X0JmQzruGRgW4tjrB8rAPT9Zx6x-W6xBhQpVIwGW1zw7wImZ6R6JAyQywo-CiCE3_AZtJcxB74aev7jVG1PFobnm3PKanSX0bwWP3Z8E6z-6tDwuPgrSa6mhuz7', reporting_source='Twitter', location_ISO_code='ZW', label='Refuted', metadata={'required_reannotation': False, 'justification': 'Seems the numbers is no where near half. Therefore the claim is refuted', 'fact_checking_article': 'https://web.archive.org/web/20210129100929/https://zimfact.org/does-zimbabwe-get-more-than-half-afreximbank-loans/', 'claim_types': ['Numerical Claim'], 'fact_checking_strategies': ['Numerical Comparison'], 'questions': [{'question': 'How much has Zimbabwe gotten from the Africa Export Import Bank', 'answers': [{'answer': 'Zimbabwe got US$2,78 billion.', 'answer_type': 'Abstracti

In [22]:
retriever = SimpleFaissRetriever(path="/mnt/data/factcheck/averitec-data/data_store/vecstore/dev/6k")
retrieval_result = retriever(datapoint)
retrieval_result

RetrievalResult(documents=[Document(metadata={'url': 'https://www.voanews.com/a/afreximbank-pledges-to-post-mugabe-zimbabwe/4161541.html', 'context_before': '', 'context_after': 'The southern African nation, which dumped its hyperinflation-hit currency in 2009, is struggling with a severe dollar crunch that has seen banks fail to avail cash to customers while importers struggle to pay for imports. Finance Minister Patrick Chinamasa promised in a budget speech last week to re-engage with international lenders, curb spending and attract investors to revive the economy. '}, page_content='The African Export and Import Bank has pledged up to $1.5 billion in new loans and financial guarantees to Zimbabwe in a major boost for new President Emmerson Mnangagwa\'s government, the bank\'s president and chairman said Tuesday. Mnangagwa, who took over last month after veteran autocrat Robert Mugabe quit following a de facto military coup, has vowed to focus on reviving the struggling economy and pr

In [23]:
datapoint = Datapoint.from_dict(dataset[2])
datapoint.claim, datapoint.claim_date, datapoint.speaker

("Due to Imran Khan's criticism of Macron's comments on Islam, French authorities cancelled the visas of 183 Pakistani citizens and deported 118 from the country.",
 '31-10-2020',
 'Consulate General Of Pakistan France')

In [25]:
path = "/mnt/data/factcheck/averitec-data/"
target = path + "data_store/vecstore/dev/6k"
PIPELINE_NAME = "simple+gpt4o"
pipeline = Pipeline(
    retriever,
    GptBatchedEvidenceGenerator(), 
    DefaultClassifier()
)

submission = []
dump = []

for dp in tqdm(datapoints):
    pipeline_result = pipeline(dp)
    submission.append(pipeline_result.to_submission())
    dump.append(pipeline_result)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.json", "w") as f:
    json.dump(submission, f, indent=4)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.pkl", "wb") as f:
    pickle.dump(dump, f)

100%|██████████| 500/500 [00:58<00:00,  8.59it/s]


In [27]:
files = pipeline.evidence_generator.get_batch_files(path=f"/mnt/data/factcheck/averitec-data/data_store/batch_jobs/{split}_{PIPELINE_NAME}")

In [28]:
files

['/mnt/data/factcheck/averitec-data/data_store/batch_jobs/dev_simple+gpt4o/batch_1.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/dev_simple+gpt4o/batch_2.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/dev_simple+gpt4o/batch_3.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/dev_simple+gpt4o/batch_4.jsonl',
 '/mnt/data/factcheck/averitec-data/data_store/batch_jobs/dev_simple+gpt4o/batch_5.jsonl']

In [36]:
batch_results = pipeline.evidence_generator.submit_and_await_batches(files, f"/mnt/data/factcheck/averitec-data/data_store/batch_jobs/{split}_{PIPELINE_NAME}/output.jsonl")

!!!!! existing outfile found, skipping computation


In [31]:
new_dump = []
pipeline.evidence_generator.fallback_gpt_generator.client.temperature = .5
for pipeline_result, batch_result in zip(dump[:len(batch_results)], batch_results):
    new_result = pipeline.evidence_generator.update_pipeline_result(pipeline_result, batch_result, pipeline.classifier)
    new_dump.append(new_result)


In [34]:
(
    new_dump[1],
    new_dump[1].evidence_generation_result,
    new_dump[1].classification_result
)

(PipelineResult(datapoint=Datapoint(claim='Trump Administration claimed songwriter Billie Eilish Is Destroying Our Country In Leaked Documents', claim_id=1, claim_date='31-10-2020', speaker=None, original_claim_url=None, reporting_source='Instagram', location_ISO_code='US', label='Refuted', metadata={'required_reannotation': False, 'justification': 'Seems that the Wzshington post accused the singer wrong lyrics. Therefore the claim is refuted', 'fact_checking_article': 'https://web.archive.org/web/20201103001419/https://leadstories.com/hoax-alert/2020/11/fact-check-trump-administration-did-not-claim-songwriter-billie-eilish-was-destroying-country-in-leaked-documents.html', 'claim_types': ['Position Statement', 'Event/Property Claim'], 'fact_checking_strategies': ['Written Evidence'], 'questions': [{'question': 'Has the Trump administration voiced that Billie Eilish was destroying the country', 'answers': [{'answer': 'A Washington Post story wrongly claimed the Trump administration accu

In [106]:
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.json", "w") as f:
    json.dump([d.to_submission() for d in new_dump], f, indent=4)
with open(f"/mnt/data/factcheck/averitec-data/data_store/submissions/{split}_{PIPELINE_NAME}.pkl", "wb") as f:
    pickle.dump(new_dump, f)

In [17]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submissions/dev_mmr+gpt4o.json

Question-only score (HU-meteor):             0.4514012996684183
Question-answer score (HU-meteor):           0.2754133041714308
Veracity F1 scores:
 * Supported:                                0.673992673992674
 * Refuted:                                  0.7979966611018364
 * Not Enough Evidence:                      0.25316455696202533
 * Conflicting Evidence/Cherrypicking:       0.04081632653061224
 * macro:                                    0.44149255464678705
 * acc:                                      0.684
--------------------
AVeriTeC scores:
 * Veracity scores (meteor @ 0.1):           0.67
 * Veracity scores (meteor @ 0.2):           0.54
 * Veracity scores (meteor @ 0.25):          0.378
 * Veracity scores (meteor @ 0.3):           0.248
 * Veracity scores (meteor @ 0.4):           0.104
 * Veracity scores (meteor @ 0.5):           0.03
--------------------
AVeriTeC scores by type @ 0.25:
 * Veracity scores (Event/Property Claim):   0.1804752953562197
 * Veracity scores (P

## collapsible begin

In [None]:
from IPython.display import display, Markdown, Latex

In [None]:
knn_retrieval_result = retriever(datapoint)
display(Markdown("### 🗯️ " + datapoint.claim))
display(Markdown("*Retrieved by knn*\n\n"))
# sample 3
for r in knn_retrieval_result:
    newline = "\n"
    display(Markdown(f"**{r.metadata['url']}**\n\n{r.page_content[:256]}"))

In [None]:
from retrieval import MmrFaissRetriever

mmr_retriever = MmrFaissRetriever(retriever.path)
mmr_retrieval_result = mmr_retriever(datapoint)
display(Markdown("### 🗯️ " + datapoint.claim))
display(Markdown("*Retrieved by MMR*\n\n"))
# sample 3
for r in mmr_retrieval_result:
    newline = "\n"
    display(Markdown(f"**{r.metadata['url']}**\n\n{r.page_content[:256]}"))

In [None]:
subquery_retriever = SubqueryRetriever(retriever)
subquery_retrieval_result = subquery_retriever(datapoint)
display(Markdown("### 🗯️ " + datapoint.claim))
display(Markdown("*Retrieved by subqueries*\n\n"))
# sample 3
for r in subquery_retrieval_result:
    newline = "\n"
    display(Markdown(f"**{r.metadata['url']}**\n\n*{';'.join(r.metadata['queries'])}*\n\n{r.page_content[:256]}"))

In [None]:
subquery_retrieval_result.metadata

## Collapsible section end

In [None]:
evidence_generator = GptBatchedEvidenceGenerator("gpt-4o")
evidence_generation_result = evidence_generator(datapoint, retrieval_result)
evidence_generation_result

In [None]:
evidence_generation_result.metadata["suggested_label"]

In [None]:
datapoint.label

In [None]:
classifier = DefaultClassifier()
classification_result = classifier(datapoint, evidence_generation_result, retrieval_result)
str(classification_result), classification_result

In [None]:
datapoint2 = Datapoint.from_dict(dataset[16])
pipeline = Pipeline(retriever, evidence_generator, classifier)
pipeline_result = pipeline(datapoint2)
pipeline_result

In [None]:
str(pipeline_result.classification_result), datapoint2.label

In [None]:
pipeline_result.to_submission()

In [None]:
# pickle dump pipeline result
import pickle
with open('data/pipeline_result.pkl', 'wb') as f:
    pickle.dump(pipeline_result, f)

In [None]:
%run src/prediction/evaluate_veracity.py --label_file /mnt/data/factcheck/averitec-data/data/dev.json --prediction_file /mnt/data/factcheck/averitec-data/data_store/submission_dev_avg_clf.json