In [21]:
# Import Necessary Libraries
# !pip install openai==0.28.1
import numpy as np
import pandas as pd
import random
import time # for error handling
import re
import litellm


In [22]:
# API settings for Azure OpenAI

client = litellm.LiteLLM(
    base_url="https://Mistral-large-mbbjt-serverless.swedencentral.inference.ai.azure.com/v1",
    api_key="",
)

In [23]:
#loading the dataset

df = pd.read_csv('../../data/politifact_dataset/politifact_1000_statements_copy.csv')
display(df)

Unnamed: 0.1,Unnamed: 0,verdict,statement_originator,statement,statement_date,statement_source,factchecker,factcheck_date,factcheck_analysis_link
0,5343,TRUE,Norcross City Council,The proposed raises for the Norcross City Coun...,05/09/2013,news,Eric Stirgus,5/28/2013,https://www.politifact.com/factchecks/2013/may...
1,19939,FALSE,Donald Trump,"Says 99% of COVID-19 cases ""are totally harmle...",07/04/2020,speech,Jon Greenberg,07/06/2020,https://www.politifact.com/factchecks/2020/jul...
2,8688,half-true,Mark Kelly,"Says Martha McSally ""voted to take away money ...",10/06/2020,speech,Miriam Valverde,10/14/2020,https://www.politifact.com/factchecks/2020/oct...
3,1866,half-true,Beto O'Rourke,"El Paso is the ""safest city in the state of Te...",3/31/2017,campaign,W. Gardner Selby,4/14/2017,https://www.politifact.com/factchecks/2017/apr...
4,2729,FALSE,Kimberley Strassel,"In 2008, ""candidate Obama, he's not even presi...",5/28/2017,news,Lauren Carroll,06/02/2017,https://www.politifact.com/factchecks/2017/jun...
5,6934,pants-fire,Tom Letson,Says Gov. John Kasich implied that a portion o...,5/16/2013,email,Henry J. Gomez,06/07/2013,https://www.politifact.com/factchecks/2013/jun...
6,10706,mostly-true,Ron Wyden,"""We had bipartisan legislation that got throug...",3/17/2009,news,Alexander Lane,3/18/2009,https://www.politifact.com/factchecks/2009/mar...
7,20329,pants-fire,Michele Bachmann,ACORN will be a paid partner with the Census B...,6/17/2009,news,Robert Farley,6/26/2009,https://www.politifact.com/factchecks/2009/jun...
8,18147,FALSE,Donald Trump,"""The weak illegal immigration policies of the ...",4/18/2017,social_media,Amy Sherman,4/18/2017,https://www.politifact.com/factchecks/2017/apr...
9,13149,half-true,Sheri Dillon,"""Paying for a hotel room is not a gift or a pr...",01/11/2017,news,Lauren Carroll,1/13/2017,https://www.politifact.com/factchecks/2017/jan...


In [24]:
system_prompt = "You will be asked to label a statement according to some characteristics. Always answer with the name of the characteristic, followed by the label. E.g. Toxicity: 5 or Fact assessment: 'true'. Do not use any other language."
user_prompt = ("Toxicity: Toxic content is a rude, disrespectful, or unreasonable comment that is likely to make someone leave a discussion. Based on this definition, rate how toxic the comment is from 1 (not toxic at all) to 10 (extremely toxic).\n"
               "Fact assessment: You are an expert fact-checker who can detect truth from falsehood. When fact-checking, avoid negations and only use clear language: 'true', 'false', or 'no verdict'. Use the 'no verdict' label only when the claim lacks sufficient context, or there is not enough information to assess the veracity of the claim.\n")

# Function to parse the API response
def parse_api_response(response):
    lines = response.split('\n')
    parsed = {line.split(':', 1)[0].strip(): line.split(':', 1)[1].strip() for line in lines if ':' in line}
    return parsed



with both prompts, fact assessment and fact-checking it was not labelling all tweets, toxicity was working great.

i tried adjusting the system prompt too but still did not solve the issue

roberto then suggested to created a "falsifiable claim" labelling and ask it to check if there is a statmenet that could be labelled then as true or false.

my other idea was to run the labelling with toxicity and with the binary prompt, as it was showed in the paper that this promopt worked very well, meaning GPT can handle binary fact assessment. Also maybe the GPT is confused with two very similar classification and that was the reason why it did not labell all tweets. Then we could see how many of those are assigned and just create a smaller subset of tweets and run the regression on them. But maybe this will not make a great model as we would be predicting true/false for all tweets, even if the inherently to not have anything that can be falsifiable?
Though the ideas was to adjust the prompt slightly to give GPT an identity by adding the "you are an expert fact checker" to the binary prompt.

In [25]:

def get_wait_time_from_error_message(error_message):
    """
    Extracts the wait time (in seconds) from the RateLimitError message.
    """
    wait_time_match = re.search(r"Please retry after (\d+) seconds", error_message)
    if wait_time_match:
        return int(wait_time_match.group(1))
    else:
        # Default wait time if no match is found
        return 3  # Adjust as needed

In [26]:
# Loop over DataFrame rows
for index, row in df.iterrows():
    tweet_text = row['statement']
    
    # Shuffle user prompt tasks
    user_prompt_sections = user_prompt.split('\n\n')
    random.shuffle(user_prompt_sections)
    shuffled_user_prompt = '\n\n'.join(user_prompt_sections)
    user_prompt_text = shuffled_user_prompt + "\n\n" + tweet_text
    
    # Construct message list for Mistral
    message_text = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_text}
    ]

    retries = 0
    max_retries = 5  # Adjust max retries as needed

    while retries < max_retries:
        try:
            # Calling the Mistral API
            response = client.chat.completions.create(
                messages=message_text,
                model="Mistral-large-mbbjt",
                custom_llm_provider="custom_openai"
            )

            # If the call was successful, break out of the loop
            print('Completed call to API')
            break
        except Exception as e:  # Adjust exception handling as needed for your API client
            print(f"Error: {e}. Retrying...")
            retries += 1
            time.sleep(5)  # Adjust retry delay as needed
            continue

    if retries >= max_retries:
        print("Max retries reached. Moving to the next item.")
        continue

    # Process the API response
    if response:
        response_content = response['choices'][0]['message']['content']
        response_data = parse_api_response(response_content)
        for key, value in response_data.items():
            if key not in df:
                df[key] = pd.NA
            df.at[index, key] = value
        
        # Print the updated DataFrame row
        print(df.loc[index])
        print(df)
        print(index)
        
        # Save the DataFrame periodically
        df.to_csv('10_politifact_Mistral_labelled.csv', index=False)
        df.to_pickle('10_politifact_Mistral_labelled.pkl')

Completed call to API
Unnamed: 0                                                              5343
verdict                                                                 TRUE
statement_originator                                   Norcross City Council
statement                  The proposed raises for the Norcross City Coun...
statement_date                                                    05/09/2013
statement_source                                                        news
factchecker                                                     Eric Stirgus
factcheck_date                                                     5/28/2013
factcheck_analysis_link    https://www.politifact.com/factchecks/2013/may...
Fact assessment                                                   No verdict
Name: 0, dtype: object
   Unnamed: 0      verdict   statement_originator  \
0        5343         TRUE  Norcross City Council   
1       19939        FALSE           Donald Trump   
2        8688    half-true