# 

In [50]:
%load_ext autoreload
%autoreload 2

from retrieval import SimpleFaissRetriever
from evidence_generation import GptEvidenceGenerator
from classification import DefaultClassifier
from averitec import Pipeline, Datapoint

import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:
with open('data/dev.json') as f:
    dataset = json.load(f)
    claim_id = 0
    for el in dataset:
        el['claim_id'] = claim_id
        claim_id += 1

In [52]:
datapoint = Datapoint.from_dict(dataset[150])
datapoint

Datapoint(claim='Zimbabwe gets more than half of the African Export Import Bank loans.', claim_id=150, claim_date='7-10-2020', speaker='Tendai Biti', original_claim_url='https://web.archive.org/web/20210129100929im_/https://lh4.googleusercontent.com/WwmjzhYNC1g84X0JmQzruGRgW4tjrB8rAPT9Zx6x-W6xBhQpVIwGW1zw7wImZ6R6JAyQywo-CiCE3_AZtJcxB74aev7jVG1PFobnm3PKanSX0bwWP3Z8E6z-6tDwuPgrSa6mhuz7', reporting_source='Twitter', location_ISO_code='ZW', label='Refuted', split='dev', metadata={'required_reannotation': False, 'justification': 'Seems the numbers is no where near half. Therefore the claim is refuted', 'fact_checking_article': 'https://web.archive.org/web/20210129100929/https://zimfact.org/does-zimbabwe-get-more-than-half-afreximbank-loans/', 'claim_types': ['Numerical Claim'], 'fact_checking_strategies': ['Numerical Comparison'], 'questions': [{'question': 'How much has Zimbabwe gotten from the Africa Export Import Bank', 'answers': [{'answer': 'Zimbabwe got US$2,78 billion.', 'answer_type

In [53]:
retriever = SimpleFaissRetriever(k=5, path="data_store/vecstore/dev/6k")
retrieval_result = retriever(datapoint)
retrieval_result

RetrievalResult(documents=[Document(metadata={'url': 'https://www.voanews.com/a/afreximbank-pledges-to-post-mugabe-zimbabwe/4161541.html', 'context_before': '', 'context_after': 'The southern African nation, which dumped its hyperinflation-hit currency in 2009, is struggling with a severe dollar crunch that has seen banks fail to avail cash to customers while importers struggle to pay for imports. Finance Minister Patrick Chinamasa promised in a budget speech last week to re-engage with international lenders, curb spending and attract investors to revive the economy. '}, page_content='The African Export and Import Bank has pledged up to $1.5 billion in new loans and financial guarantees to Zimbabwe in a major boost for new President Emmerson Mnangagwa\'s government, the bank\'s president and chairman said Tuesday. Mnangagwa, who took over last month after veteran autocrat Robert Mugabe quit following a de facto military coup, has vowed to focus on reviving the struggling economy and pr

In [54]:
evidence_generator = GptEvidenceGenerator("gpt-4o")
evidence_generation_result = evidence_generator(datapoint, retrieval_result)
evidence_generation_result

EvidenceGenerationResult(evidences=[Evidence(question='What is the total amount of loans provided by the African Export Import Bank (Afreximbank) to African countries?', answer='Since 1994, Afreximbank has approved more than $51 billion in credit facilities for African businesses.', url='https://www.afreximbank.com/afreximbank-tackles-zimbabwes-financial-sector-liquidity-challenges-new-facility-trade-finance-banks/', scraped_text='Cairo, 11 Feb. 2014– To boost access to trade finance, the African Export-Import Bank (Afreximbank) has entered into an agreement to introduce a facility and associated instruments to alleviate the liquidity challenges confronting the financial sector in Zimbabwe. The Afreximbank Trade Debt-backed Securities (AFTRADES) will be provided to participating banks as debt securities that could be used as collateral for interbank funds placements in order to promote interbank dealings among Zimbabwean Banks active in trade finance, according to the terms of a memora

In [55]:
datapoint.label

'Refuted'

In [56]:
classifier = DefaultClassifier()
classification_result = classifier(datapoint, evidence_generation_result, retrieval_result)
str(classification_result), classification_result

('Refuted',
 ClassificationResult(probs=array([0.92045574, 0.01685873, 0.04582679, 0.01685873]), metadata=None))

In [57]:
datapoint2 = Datapoint.from_dict(dataset[16])
pipeline = Pipeline(retriever, evidence_generator, classifier)
pipeline_result = pipeline(datapoint2)
pipeline_result

PipelineResult(datapoint=Datapoint(claim='The plentiful evidence we have indicates that masks would not meaningfully help with aerosol transmission of COVID 19.', claim_id=16, claim_date='29-10-2020', speaker='Yinon Weiss', original_claim_url='https://thefederalist.com/2020/10/29/these-12-graphs-show-mask-mandates-do-nothing-to-stop-covid/', reporting_source='The Federalist', location_ISO_code='US', label='Refuted', split='dev', metadata={'required_reannotation': True, 'justification': 'Evidence finds that surgical face masks could prevent transmission of human coronaviruses and influenza viruses from symptomatic individuals. The CDC and the World Health Organization (WHO), at the time of the claim, also recommended cloth masks for the general public.', 'fact_checking_article': 'https://web.archive.org/web/20210711073635/https://healthfeedback.org/claimreview/wearing-face-masks-helps-reduce-the-spread-of-covid-19-but-it-only-confers-partial-protection-and-should-be-combined-with-additi

In [43]:
str(pipeline_result.classification_result), datapoint2.label

('Refuted', 'Refuted')

In [58]:
pipeline_result.to_submission()

{'claim_id': 16,
 'claim': 'The plentiful evidence we have indicates that masks would not meaningfully help with aerosol transmission of COVID 19.',
 'evidence': [{'question': 'What is the effectiveness of masks in blocking respiratory droplets that can carry the novel coronavirus?',
   'answer': 'Face masks can dramatically reduce the person-to-person transmission of SARS-CoV-2 by blocking respiratory droplets.',
   'url': 'https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0237691',
   'scraped_text': "A log transformed analysis is conducted to evaluate the growth rate of COVID-19 infections. ANOVA analysis is used to analyze the group effect. The p value is less than 2e-16 for the log 2 transformed case comparison, suggesting that the two groups are statistically significantly different. The Asian mask-wearing group clearly has lower growth rates of COVID-19 cases compared to the non-mask-wearing group. Thailand has the highest growth rate among the mask-wearing group

In [59]:
# pickle dump pipeline result
import pickle
with open('data/pipeline_result.pkl', 'wb') as f:
    pickle.dump(pipeline_result, f)

## Whole dev set example
not really whole, but we will show a sampling :)

In [60]:
import json, random, pickle
from tqdm.notebook import tqdm

random.seed(111)

retriever = SimpleFaissRetriever(k=5, path="data_store/vecstore/dev/6k")
evidence_generator = GptEvidenceGenerator("gpt-4o")
classifier = DefaultClassifier()
pipeline = Pipeline(retriever, evidence_generator, classifier)

with open("data/dev.json") as f:
    dataset = json.load(f)
    seq = 0
    for d in dataset:
        d["claim_id"] = seq
        seq += 1

sample_size = 5
sample = random.sample(dataset, 5)

with open(f"data/dev{sample_size}.json", "w") as f:
    json.dump(sample, f)
    
datapoints = [Datapoint.from_dict(d) for d in sample]
submission = []
dump = []

for dp in tqdm(datapoints):
    pipeline_result = pipeline(dp)
    submission.append(pipeline_result.to_submission())
    dump.append(pipeline_result)
    with open(f"data_store/submission_dev{sample_size}.json", "w") as f:
        json.dump(submission, f, indent=4)
    with open(f"data_store/pipeline_result_dev{sample_size}.pkl", "wb") as f:
        pickle.dump(dump, f)

  0%|          | 0/5 [00:00<?, ?it/s]

### now lets see if our submission fits the eval script

In [62]:
%run src/prediction/evaluate_veracity.py --label_file data/dev{sample_size}.json --prediction_file data_store/submission_dev{sample_size}.json

Question-only score (HU-meteor):             0.5523442417622183
Question-answer score (HU-meteor):           0.3133973943943565
Veracity F1 scores:
 * Supported:                                0.0
 * Refuted:                                  0.8
 * Not Enough Evidence:                      0.0
 * Conflicting Evidence/Cherrypicking:       0.0
 * macro:                                    0.2
 * acc:                                      0.4
--------------------
AVeriTeC scores:
 * Veracity scores (meteor @ 0.1):           0.4
 * Veracity scores (meteor @ 0.2):           0.4
 * Veracity scores (meteor @ 0.25):          0.4
 * Veracity scores (meteor @ 0.3):           0.2
 * Veracity scores (meteor @ 0.4):           0.2
 * Veracity scores (meteor @ 0.5):           0.0
--------------------
AVeriTeC scores by type @ 0.25:
 * Veracity scores (Position Statement):     0.36029405298961437
 * Veracity scores (Quote Verification):     0.1943704510551867
 * Veracity scores (Event/Property Claim):  

### and if we pickled all the dumps correctly

In [64]:
dump = pickle.load(open("data_store/pipeline_result_dev5.pkl", "rb"))
dump

[PipelineResult(datapoint=Datapoint(claim='Sen. Martha McSally “supported a plan to turn Medicare into a voucher program and ‘shift costs’ onto seniors” and “even supported raising the retirement age.', claim_id=423, claim_date='2-9-2020', speaker='Mark Kelly', original_claim_url=None, reporting_source='Democratic presidential candidate Hillary Clinton campaign website.', location_ISO_code='US', label='Conflicting Evidence/Cherrypicking', split='dev', metadata={'required_reannotation': False, 'justification': 'Although Sen. Martha McSally endorsed Medicare into vouchers - she then supported a motion to prevent this from happening. She did support raising retirement age.', 'fact_checking_article': 'https://web.archive.org/web/20210522171250/https://www.politifact.com/factchecks/2020/sep/14/mark-kelly/fact-checking-mark-kelly-martha-mcsallys-stance-me/', 'claim_types': ['Position Statement', 'Quote Verification', 'Event/Property Claim'], 'fact_checking_strategies': ['Written Evidence', '