# ꩜ Test rounds with ChatGPT

In [49]:
import json
from utils.chat import *

## 1. ❓ Question generation expt

In [42]:
# load 1st line of data/dev.json
with open('data/dev.json') as f:
    data = json.load(f)
    i = 0
    datapoint = data[i]
    print(datapoint['claim'])
    print(datapoint['label'])
    print(datapoint['justification'])
    
system_prompt = """You are a professional fact checker, formulate about 4 questions that cover all the facts needed to validate whether the following statement is true, false, uncertain or a matter of opinion.

Please, you MUST only print the output in the following output format:
```json
[
    "<Your first question>",
    "<Your second question>",
    "<Your third question>"
]
```"""
user_message = datapoint['claim']
chat = SimpleJSONChat(model="gpt-4o",system_prompt=system_prompt)

In a letter to Steve Jobs, Sean Connery refused to appear in an apple commercial.
Refuted
The answer and sources show that the claim was published in a fake news site so the claim is refuted.


In [26]:
print(chat(user_message))

['Did Sean Connery ever write a letter to Steve Jobs?', 'Did Sean Connery refuse to appear in an Apple commercial?', 'Is there any documented evidence or credible source that confirms the existence of such a letter?']


In [27]:
chat("The United States of America and its Western allies have been using their media outlets to publish articles based on fabricated information under allegations of non-compliance with the Chemical Weapons Convention.")

['Have there been verified instances of the United States or its Western allies using media outlets to publish fabricated information?',
 'What specific articles or media reports are being referred to in the statement?',
 'Have there been any official investigations or reports confirming the use of fabricated information by the United States or its Western allies regarding the Chemical Weapons Convention?',
 'What are the sources or evidence supporting the claim that the information published was fabricated?']

In [28]:
chat( "Why should you pay more taxes than Donald Trump pays? And that\u2019s a fact. $750. Remember what he said when that was raised a while ago, how he only pays \u2026 He said, \u2018Because I\u2019m smart. I know how to game the system.\u2019")

['Did Donald Trump pay $750 in federal income taxes in a specific year?',
 'Did Donald Trump publicly state that he pays low taxes because he is smart and knows how to game the system?',
 'Is there a context or specific time frame mentioned when Donald Trump made the statement about paying low taxes?']

## 2. 🤥 Veracity labeling expt

In [44]:
# unpack /home/ullriher/aic_averitec/data/dev.json to data
with open('data/dev.json') as f:
    data = json.load(f)
    i = 0
    datapoint = data[i]
    print(datapoint['claim'])
    print(datapoint['label'])
    print(datapoint['justification'])
datapoint

In a letter to Steve Jobs, Sean Connery refused to appear in an apple commercial.
Refuted
The answer and sources show that the claim was published in a fake news site so the claim is refuted.


{'claim': 'In a letter to Steve Jobs, Sean Connery refused to appear in an apple commercial.',
 'required_reannotation': False,
 'label': 'Refuted',
 'justification': 'The answer and sources show that the claim was published in a fake news site so the claim is refuted.',
 'claim_date': '31-10-2020',
 'speaker': None,
 'original_claim_url': None,
 'fact_checking_article': 'https://web.archive.org/web/20201130144023/https://checkyourfact.com/2020/11/03/fact-check-sean-connery-letter-steve-jobs-apple-1998/',
 'reporting_source': 'Facebook',
 'location_ISO_code': None,
 'claim_types': ['Event/Property Claim'],
 'fact_checking_strategies': ['Written Evidence'],
 'questions': [{'question': 'Where was the claim first published',
   'answers': [{'answer': 'It was first published on Sccopertino',
     'answer_type': 'Abstractive',
     'source_url': 'https://web.archive.org/web/20201129141238/https://scoopertino.com/exposed-the-imac-disaster-that-almost-was/',
     'source_medium': 'Web text',


In [35]:
# load /home/ullriher/aic_averitec/data_store/dev_top_3_rerank_qa.jsonl
with open('data_store/dev_top_3_rerank_qa.json') as f:
    data = []
    for line in f:
        data.append(json.loads(line))
    datapoint = data[i]
datapoint

{'claim_id': 0,
 'claim': 'In a letter to Steve Jobs, Sean Connery refused to appear in an apple commercial.',
 'evidence': [{'question': 'Did Sean connery send a fake letter about real Steve jobs?',
   'answer': 'Also, fake Sean Connery sent a letter to Real Steve Jobs.',
   'url': 'https://www.nbcnews.com/news/world/pre-caffeine-tech-apple-gossip-smart-pugs-flna122578'},
  {'question': 'Did President Trump tell the truth about the fake letter to Apple CEO Steve Job Jobs Jobs  Question answer:  Yes, Trump said it was fake.?',
   'answer': 'Thanks to the confluence of my interests and the fact that it’s funny as hell, I’ve been inundated with email regarding Scoopertino’s fake 1998 letter from Sean Connery to Steve Jobs.',
   'url': 'https://maclalala2.wordpress.com/2011/06/24/%E3%81%9F%E3%81%8B%E3%81%8C%E3%82%B3%E3%83%B3%E3%83%94%E3%83%A5%E3%83%BC%E3%82%BF%E3%82%BB%E3%83%BC%E3%83%AB%E3%82%B9%E3%83%9E%E3%83%B3%E3%81%AE%E3%81%9F%E3%82%81%E3%81%AB%E3%82%B8%E3%82%A7%E3%83%BC/'},
  {'quest

In [None]:
for datapoint in data:
    #if has questions, concatenate them as a {question:question, answer:concat(answers[answer])} evidence
    if 'questions' in datapoint:
        questions = datapoint['questions']
        answers = []
        for question in questions:
            answers.append(question['answer'])
        evidence = {'question':questions[0]['question'],'answer':answers}
        datapoint['evidence'] = evidence

In [48]:
datapoint

{'claim': 'In a letter to Steve Jobs, Sean Connery refused to appear in an apple commercial.',
 'required_reannotation': False,
 'label': 'Refuted',
 'justification': 'The answer and sources show that the claim was published in a fake news site so the claim is refuted.',
 'claim_date': '31-10-2020',
 'speaker': None,
 'original_claim_url': None,
 'fact_checking_article': 'https://web.archive.org/web/20201130144023/https://checkyourfact.com/2020/11/03/fact-check-sean-connery-letter-steve-jobs-apple-1998/',
 'reporting_source': 'Facebook',
 'location_ISO_code': None,
 'claim_types': ['Event/Property Claim'],
 'fact_checking_strategies': ['Written Evidence'],
 'questions': [{'question': 'Where was the claim first published',
   'answers': [{'answer': 'It was first published on Sccopertino',
     'answer_type': 'Abstractive',
     'source_url': 'https://web.archive.org/web/20201129141238/https://scoopertino.com/exposed-the-imac-disaster-that-almost-was/',
     'source_medium': 'Web text',


In [46]:
print(chat2.system_prompt)

You are an expert factchecker, you classify the user's claim veracity ONLY WITH RESPECT to the following 3 questions and their answers:

Q: How did the Republicans get the ratings they did on Monday night's first day of their convention?
A:Now, for the somewhat complicated look at TV ratings for Monday’s first night of the Republican National Convention.

Q: How many viewers watched the convention?
A:The second night of the Democratic convention had 19.2 million viewers across the same networks.[242] As per the table below, night two of the Republican convention had 18 million viewers across six major, traditional television and cable networks tracked by Nielsen.

Q: What are the networks that were the viewers of this night's Democratic Convention?
A:The second night of the Democratic convention had 18.5 million viewers across the same six networks.[243]

    You must only print one of the following outputs:
    Supported
- Refuted
- Not Enough Evidence
- Conflicting Evidence/Cherrypic

In [47]:
qas = "\n\n".join(["Q: " + e["question"] + "\nA: " + e["answer"] for e in datapoint["evidence"]])
verdicts = [
    "Supported",
    "Refuted",
    "Not Enough Evidence",
    "Conflicting Evidence/Cherrypicking",
]
verds = "\n- ".join(verdicts)
chat2 = SimpleJSONChat(
    model="gpt-4o",
    system_prompt=f"""You are an expert factchecker, you classify the user's claim veracity ONLY WITH RESPECT to the following 3 questions and their answers:\n\n{qas}

You must only print one of the following outputs:\n- {verds}                    
""",
parse_output=False
)

chat2(datapoint["claim"])

KeyError: 'evidence'

In [41]:
data2 = []
for datapoint in data:
    qas = "\n\n".join(["Q: " + e["question"] + "\nA:" + e["answer"] for e in datapoint["evidence"]])
    verdicts = [
        "Supported",
        "Refuted",
        "Not Enough Evidence",
        "Conflicting Evidence/Cherrypicking",
    ]
    verds = "\n- ".join(verdicts)
    chat2 = SimpleJSONChat(
        model="gpt-4o",
        system_prompt=f"""You are an expert factchecker, you classify the user's claim veracity ONLY WITH RESPECT to the following 3 questions and their answers:\n\n{qas}

    You must only print one of the following outputs:
    {verds}                    
    """,
    parse_output=False
    )

    datapoint["pred_label"]=chat2(datapoint["claim"])
    with open('data_store/dev_veracity_gpt.jsonl',"w") as f:
        # if lovercase datapoint label contains sup, make "Supported"
        if "sup" in datapoint["pred_label"].lower():
            datapoint["pred_label"] = "Supported"
        # if lovercase datapoint label contains ref, make "Refuted"
        elif "ref" in datapoint["pred_label"].lower():
            datapoint["pred_label"] = "Refuted"
        # if lovercase datapoint label contains not, make "Not Enough Evidence"
        elif "not" in datapoint["pred_label"].lower():
            datapoint["pred_label"] = "Not Enough Evidence"
        # if lovercase datapoint label contains conf, make "Conflicting Evidence/Cherrypicking"
        elif "conf" in datapoint["pred_label"].lower() or "cherr" in datapoint["pred_label"].lower():
            datapoint["pred_label"] = "Conflicting Evidence/Cherrypicking"
        data2.append(datapoint)
        #prettysave to f
        f.write(json.dumps(data2, indent=4))