# Evaluate GPT-4o mini Agreement with Human Annotations on Article Labels
This script evaluates the agreement of GPT-4o mini with human annotations on a dataset of articles labeled as 'Fake' or 'Real'.

In [None]:
!pip install openai==0.28

In [None]:
key = 'sk-'

In [None]:
import openai
import pandas as pd

# Set your API key
openai.api_key = key

# Load your dataset
file_path = 'Summary_datasets/JUDGE_500_mistral.csv' # Replace with your actual file path for the dataset
data = pd.read_csv(file_path)

# Function to query GPT-4o mini
def query_model(article, label):
    prompt = f"""
                You are a fact-checking assistant.
                Review the article below and determine if the label '{label}' accurately describes the article.
                Respond with 'YES' if you agree with the label and 'NO' if you disagree with the label.

                For 'Fake' labels:
                - The article contains any misleading or false information, or is satirical.

                For 'Real' labels:
                - The article contains accurate and verifiable information.

                Respond only with 'YES' or 'NO'.

                Article:
                {article}

                Label:
                {label}
            """
# For 'Fake' labels:
# - Presence of misleading information or disinformation


# For 'Real' labels:
# - Accuracy of information presented

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=3,  # Restricting the response length
            temperature=0.0,  # Setting temperature to 0 for deterministic output
            n=1,
            stop=None
        )
        return response.choices[0].message['content'].strip()
    except Exception as e:
        print(f"Error querying model: {e}")
        return None

# Evaluating annotations
results = []
for idx, row in data.iterrows():
    agreement = query_model(row['text_content_summary'], row['label'])
    results.append(agreement)
    print(f"Processed {idx+1}/{len(data)}")

# Add results to the dataframe and save it
output_path = 'Summary_datasets/judge-results/4-mistral_with_agreement.csv' # Replace with your desired output path
data['AGREE'] = results
data.to_csv(output_path, index=False)
print("File saved with agreement results.")

In [None]:
import openai
import pandas as pd

# Set your API key
openai.api_key = key
# Load your dataset
file_path = 'Summary_datasets/LLM Judge-majority_500.csv' # Replace with your actual file path for the dataset
data = pd.read_csv(file_path)

# Function to query the model
def query_model(article, model_name="gpt-4o-mini"):
    prompt = f"""
                You are a helpful news fact-checking bot trained to assess the accuracy of information. Your task is to analyze the given article and determine whether it is 'Factually Correct' or 'Factually Incorrect'.

                Fact-checking is the methodical process of verifying claims in public discourse or media reports. It is vital for countering misinformation and disinformation, thereby enhancing public knowledge and trust. Consider the following in your evaluation:

                Misinformation: Incorrect or misleading information shared without intent to harm.
                Disinformation: Information that is knowingly false, often prejudiced, and disseminated with the intent to mislead.

                Your analysis should include:

                Verification of key claims against multiple reliable sources.
                Identification of logical fallacies or statements that may mislead readers.
                Assessment of the context in which the information was presented, including the source’s history and potential motivations.
                Evaluation for any presence of hate speech, linguistic harm, or intent to spread prejudice.

                Provide your assessment in the following format:

                Classification: [Factually Correct/Factually Incorrect]
                Explanation: Provide a concise, evidence-based explanation for your classification. Reference specific examples from the article and contradicting evidence from trusted sources, if applicable.

                Ensure to remain objective, basing your assessment strictly on facts and evidence rather than personal opinions or biases.

                Article to analyze:
                {article}
            """

    try:
        response = openai.ChatCompletion.create(
            model=model_name,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=10,  # Adjusted for more expressive responses
            temperature=0.0
        )
        # Extracting the message content correctly
        message_content = response.choices[0].message['content'].strip()
        # Determine the classification based on the model's response
        if "Factually Correct" in message_content:
            classification = "Real"
        elif "Factually Incorrect" in message_content:
            classification = "Fake"
        else:
            classification = "Unknown"
        return classification
    except Exception as e:
        print(f"Error querying model: {e}")
        return None

# Evaluating annotations
results = []
for idx, row in data.iterrows():
    classification = query_model(row['text_content_summary'])
    results.append(classification)
    print(f"Processed {idx+1}/{len(data)}")

# Add results to the dataframe and save it
output_path = 'Summary_datasets/judge-results/4-gpt4o-labels.csv' # Replace with your desired output path
data['Classification'] = results
data.to_csv(output_path, index=False)
print("File saved with agreement results.")