# Using a Large Language Model for text annotation
This notebook uses ChatGPT-4 to carry out zero-shot analysis of X/Twitter messages to guess the political affiliation of posters in 11 countries.

The results are stored persistently, and compared with supervised models and with human experts and crowd workers.

In [2]:
# Imports 
import pandas as pd
import time
import openai
openai.api_key = "XXXXXX" # Your API key here

## Analyzing the messages using OpenAI API

In [128]:
# This function takes a message and uses a call to the OpenAI API to annotate the message
def guess_tweet(tweet, model, temperature, instruction):
    """
    Generate a response to a tweet using a specified language model.

    Parameters:
    tweet (str): The tweet to be analyzed.
    model (str): The language model to use (e.g., 'gpt-4').
    temperature (float): The temperature setting for the model.
    instruction (str): The instruction to guide the model's response.

    This function attempts to generate a response to the given tweet by 
    repeatedly calling the OpenAI API. If an exception occurs (e.g., due to 
    API instability), it retries up to 50 times, waiting 10 seconds between 
    attempts. Once a response is successfully obtained, it concatenates the 
    content of all choices and returns the result.
    """
    response = None
    tries = 0
    failed = True

    # The API is at times unstable, so we catch exceptions and try repeatedly 
    while failed:
        try:
            response = openai.ChatCompletion.create(
                model=model,
                temperature=temperature,
                messages=[
                    {"role": "system", "content": f"{instruction}"},  # The annotation prompt
                    {"role": "user", "content": f"'{tweet}'"}  # The message to annotate
                ]
            )
            failed = False  # Successfully obtained a response
            
        except Exception as e:
            print("Caught exception. Waiting...")
            print(e)
            failed = True
            tries += 1
            time.sleep(10)  # Wait 10 seconds before retrying

            if tries > 50:
                print("Too many failures. Giving up.")
                raise e  # Raise the exception if too many retries

    # Concatenate the content of all choices in the response
    result = ''
    for choice in response.choices:
        result += choice.message.content

    return result


def process_country(instruction, file, temperature, column, nr_runs, model='gpt-4'):
    """
    Process a dataset of text data and make predictions using a language model.

    Parameters:
    instruction (str): Instruction to be used for generating predictions.
    file (str): Path to the pickle file containing the dataset.
    temperature (float): Temperature setting for the language model.
    column (str): Name of the column in the dataset where predictions are stored.
    nr_runs (int): Number of predictions to generate for each entry.
    model (str): The model to be used for generating predictions (default is 'gpt-4').

    The function continuously samples rows from the dataset that have fewer 
    predictions than specified by nr_runs, generates a prediction using the 
    language model, appends the prediction to the column, and saves the 
    dataset back to the pickle file. If an error occurs during prediction, 
    it retries up to 10 times before stopping.
    """
    
    # Load the sample DataFrame from a pickle file
    sample = pd.read_pickle(file)
    errorcount = 0

    while True:
        # Filter rows that have fewer runs than the specified nr_runs
        left = sample.loc[sample[column].map(len) < nr_runs]
        print(f"There are {len(left)} left to process.")

        # If no rows are left to process, exit the loop
        if len(left) == 0:
            print("All done!")
            break

        # Randomly sample one row from the remaining rows
        line = left.sample()
        index = line.index.values[0]

        # Wait for a second before making the next API call
        time.sleep(1)
        try:
            # Call the guess_tweet function with the specified parameters
            guess = guess_tweet(line['text'].values[0], model=model, temperature=temperature, instruction=instruction)
            
            # Append the guess to the specified column of the DataFrame
            sample[column][index].append(guess)
            print(f"Guess is: {guess}")

            # Save the updated DataFrame back to the pickle file
            sample.to_pickle(file)

        except Exception as err:
            print(f"Error. Unexpected {err=}, {type(err)=}")        
            if errorcount < 10:
                # Increment the error count and continue trying if the error count is below 10
                errorcount += 1
                print("Error running. Will just keep trying though.")
                continue
            else:
                # If too many errors occur, stop the process and raise the exception
                print("Too many errors. Giving up.")
                raise





## Example run: Germany
This provides code to show how the LLM is run on a particular country. The same process was used for each country.

The results are stored persistently, for later analysis. 

In [145]:
# Example: The run for germany
COLUMN = 'gpt4_temp02'
INSTRUCTION = '''You will be given a Twitter post from a parliamentarian in Germany, sent during the two months preceding the 2021 German general election, that is, between July 26th, 2021, and September 26th, 2021. Your task is to use your knowledge of Germn politics to make an educated guess on whether the poster belongs to the CDU (Christlich Demokratische Union Deutschlands) or SPD (Sozialdemokratische Partei Deutschlands). Your response MUST BE either 'CDU' or 'SPD'. If you cannot make an educated guess on the basis of the message, just guess either 'CDU' or 'SPD'. Do NOT motivate your answer.'''
FILE = 'tweet_process_germany.pkl'
MODEL = 'gpt-4'
TEMP = 0.2
NR_RUNS = 1

gdf = pd.read_csv('GERMANY_sample_tweets.csv')
gdf[COLUMN] = [[] for i in range(len(gdf))]
gdf.to_pickle(FILE)

process_country(INSTRUCTION, FILE, TEMP, COLUMN, NR_RUNS, MODEL)

# Have LLM motivate its annotation for a particular message

This asks ChatGPT to motivate its response for particular questions. This is used to explore the knowledge associated to particular annotation decisions. 

In [None]:
message = '''"Down Syndrome Awareness Month helps raise awareness for what it means to have Down syndrome and how individuals with Down syndrome play a vital role in our lives and communities."'''
answer = 'Republican'

response = openai.ChatCompletion.create(
    model = "gpt-4",
    temperature=0.2,
    messages=[
            {"role": "system", "content": "You will be given a Twitter post from a US politician, sent during the two months preceding the 2020 US presidential election, that is, between September 3rd, 2020, and November 3rd, 2020. Your task is to use your knowledge of US politics to make an educated guess on whether the poster is a Democrat or Republican. Your response MUST BE either 'Democrat' or 'Republican'. You are not allowed to respond anything else. If you cannot make an educated guess on the basis of the message, just guess either 'Democrat' or 'Republican'. Do NOT motivate your answer in your first respponse."},
            {"role": "user", "content": f"'{message}'"},
            {"role": "assistant", "content": f"{answer}"},
            {"role": "user", "content": "Motivate your response."}
        ]
)

result = ''
for choice in response.choices:
    result += choice.message.content

print(result)
