# 1.2. Sentiment Analysis of LLM-Generated Texts
After generating the texts using our prompts, we will perform sentiment analysis on each LLM-generated text.

We will also check the texts for cases where the attributes provided may not appear to be in the actual texts themselves to verify the accuracy of the LLM-generated attribute lists.

Let's load the libraries we will need.

In [None]:
import pandas as pd
import numpy as np
import json
import math
from textblob import TextBlob

We will start with the texts generated using implicit bias prompts.

In [None]:
# Load information for accessing the JSON files.
implicit_prompt_types_df = pd.read_csv("../1_prompt_engineering/implicit_prompt_types.csv")
implicit_jsons = implicit_prompt_types_df["json_name"]

# Define the folders where the generated texts are stored. 
folders = ["gpt_4o_mini/implicit/", "claude_3.5_sonnet/implicit/", "command_r_plus/implicit/", "llama_3.1_70b/implicit/"]

# Iterate through the folders containing the generated texts.
for implicit_texts_folder in folders: 
    # Get the number of prompt types (same as the number of JSON files).
    num_prompt_types = implicit_prompt_types_df.shape[0]
    # Keep track of the number of attributes that were not found.
    num_attribute_not_found = 0
    num_texts_attribute_not_found = 0
    num_texts = 0
    # Create a list of texts where the attribute that the LLM outputted is not found in the text.
    attributes_not_found = []

    print("Current model:", implicit_texts_folder)

    # Iterate through the JSON files.
    for file_num in range(0, num_prompt_types):
        json_path = implicit_texts_folder + implicit_jsons.iloc[file_num]
        print("Processing:", json_path)

        # Open the JSON file as a dictionary.
        with open(json_path) as json_file:
            generated_texts = json.load(json_file)
            print("Analyzing sentiment.")

            # Modify the dictionary.
            for key in generated_texts.keys():
                text = generated_texts[key]["generated_text"]
                num_texts += 1

                # For Llama 3.1, remove the "Here is a 200-word description of..." from the text.
                if "Here is" in text and "200-word description" in text:
                    # If the keywords are in the text, split it into a list of lines.
                    text_lines = text.split("\n")
                    # Remove the first line, recombine the lines into one string, and strip whitespace from the ends.
                    updated_text = "\n".join(text_lines[1:]).strip()
                    # Reassign the text.
                    generated_texts[key]["generated_text"] = updated_text

                # Check that all of the attributes in the dictionary are found in the text.
                all_attributes_found = True
                attributes = ["occupation", "socioeconomic_status", "religion", "politics", "sexual_orientation", "total_height"]
                
                if "attributes" in generated_texts[key].keys():
                    for attribute in attributes:
                        # Get the value of the attribute from the list of attributes given by the LLM.
                        value = generated_texts[key]["attributes"][attribute]
                        # Change the value to lowercase if it's not  height.
                        if attribute != "total_height":
                            value = value.lower()
                            generated_texts[key]["attributes"][attribute] = value

                        # If the attribute is height, convert the integer to string format i.e. [#'#"].
                        if attribute == "total_height":
                            value =  str(math.floor(value/12)) + "'" + str(value % 12) + "\""

                        # Check if the value is found in the generated text (not case-sensitive).
                        found = text.lower().find(value.lower())

                        # If it is not found, check for variants, then notify the user by printing a console message and increment the counts.
                        if found == -1:
                            # Check for synonymous key phrases that are commonly used by LLMs.
                            if attribute == "sexual_orientation" and value == "homosexual":
                                value = "gay"
                                found = text.lower().find(value.lower())
                            elif attribute == "socioeconomic_status" and value == "lower-class":
                                value = "lower-middle-class"
                                found = text.lower().find(value.lower())
                                if found == -1:
                                    found = text.lower().find(value.lower().replace("-", " "))
                            elif attribute == "socioeconomic_status" and value == "middle-class":
                                found = text.lower().find(value.lower().replace("-", " "))
                            elif attribute == "socioeconomic_status" and value == "upper-class":
                                value = "upper-middle-class"
                                found = text.lower().find(value.lower())
                                if found == -1:
                                    found = text.lower().find(value.lower().replace("-", " "))
                            elif attribute == "occupation" and value == "student":
                                value = "school"
                                found = text.lower().find(value.lower())
                            elif attribute == "religion":
                                # Check for religious synonyms.
                                religious_synonyms = {
                                    "christian": ["Christianity", "Catholic"],
                                    "muslim": ["Islam"],
                                    "jewish": ["Judaism", "Jew"],
                                    "hindu": ["Hinduism"],
                                    "buddhist": ["Buddhism"],
                                    "unaffiliated": ["atheist", "secular", "atheism", "secularism", "agnostic", "agnosticism"],
                                } 

                                if value in religious_synonyms.keys():
                                    corresponding_synonyms = religious_synonyms[value]
                                    
                                    for synonym in corresponding_synonyms:
                                        found = text.lower().find(synonym.lower())
                                        if found != -1:
                                            break

                            # Notify the user and update the list if the value is still not found after checking for variants.
                            if found == -1:
                                # print(attribute, value, "was not found in the text for:", key)
                                num_attribute_not_found += 1
                                all_attributes_found = False
                                attributes_not_found.append((attribute, key))

                    # If not all attributes were found, increment the count of texts with attributes not found.
                    if not all_attributes_found:
                        num_texts_attribute_not_found += 1

                    # Analyze the sentiment.
                    analysis = TextBlob(text)

                    # Add the sentiment to the attributes dictionary.
                    generated_texts[key]["attributes"]["polarity"] = analysis.polarity
                    generated_texts[key]["attributes"]["subjectivity"] = analysis.subjectivity

            print("Done analyzing sentiment.")

            # Write the dictionary to the output file as JSON data.
            with open(json_path, "w") as f:
                json.dump(generated_texts, f)

            print("New contents saved.")

    # Print the summary of the analysis.
    print("Total number of attributes not found in text:", num_attribute_not_found)
    print("Total number of texts processed:", num_texts)
    print("Total number of texts with attributes not found:", num_texts_attribute_not_found)
    print("Percentage of texts with attributes not found:", num_texts_attribute_not_found / num_texts)
    # Notify the user of texts where the attribute was not found.
    print("Attributes not found in texts:")
    print(attributes_not_found)

Now, we will analyze the texts generated using explicit bias prompts.

In [None]:
# Load information for accessing the JSON files.
explicit_prompt_types_df = pd.read_csv("../1_prompt_engineering/explicit_prompt_types.csv")
explicit_jsons = explicit_prompt_types_df["json_name"]

folders = ["gpt_4o_mini/explicit/", "claude_3.5_sonnet/explicit/", "command_r_plus/explicit/", "llama_3.1_70b/explicit/"]

for explicit_texts_folder in folders: 
    # Get the number of prompt types (same as the number of JSON files).
    num_prompt_types = explicit_prompt_types_df.shape[0]
    # Keep track of the number of attributes that were not found.
    num_attribute_not_found = 0
    num_texts_attribute_not_found = 0
    num_texts = 0
    # Create a list of texts where the attribute that the LLM outputted is found found in the text.
    attributes_not_found = []

    print("Current model:", explicit_texts_folder)

    # Iterate through the JSON files.
    for file_num in range(0, num_prompt_types):
        json_path = explicit_texts_folder + explicit_jsons.iloc[file_num]
        print("Processing:", json_path)

        # Open the JSON file as a dictionary.
        with open(json_path) as json_file:
            generated_texts = json.load(json_file)
            print("Analyzing sentiment.")

            # Modify the dictionary.
            for key in generated_texts.keys():
                text = generated_texts[key]["generated_text"]
                num_texts += 1

                all_attributes_found = True

                # Check that all of the attributes in the dictionary are found in the text.
                attributes = ["occupation", "socioeconomic_status", "religion", "politics", "sexual_orientation", "total_height"]
                
                if "attributes" in generated_texts[key].keys():
                    for attribute in attributes:
                        # Get the value of the attribute from the list of attributes given by the LLM.
                        value = generated_texts[key]["attributes"][attribute]
                        # Change the value to lowercase if it's not  height.
                        if attribute != "total_height":
                            value = value.lower()
                            generated_texts[key]["attributes"][attribute] = value

                        # If the attribute is height, convert the integer to string format i.e. [#'#"].
                        if attribute == "total_height":
                            value =  str(math.floor(value/12)) + "'" + str(value % 12) + "\""

                        # Check if the value is found in the generated text (not case-sensitive).
                        found = text.lower().find(value.lower())

                        # If it is not found, check for variants, then notify the user by printing a console message and increment the counts.
                        if found == -1:
                            # Check for synonymous key phrases that are commonly used by LLMs.
                            if attribute == "sexual_orientation" and value == "homosexual":
                                value = "gay"
                                found = text.lower().find(value.lower())
                            elif attribute == "socioeconomic_status" and value == "lower-class":
                                value = "lower-middle-class"
                                found = text.lower().find(value.lower())
                                if found == -1:
                                    found = text.lower().find(value.lower().replace("-", " "))
                            elif attribute == "socioeconomic_status" and value == "middle-class":
                                found = text.lower().find(value.lower().replace("-", " "))
                            elif attribute == "socioeconomic_status" and value == "upper-class":
                                value = "upper-middle-class"
                                found = text.lower().find(value.lower())
                                if found == -1:
                                    found = text.lower().find(value.lower().replace("-", " "))
                            elif attribute == "occupation" and value == "student":
                                value = "school"
                                found = text.lower().find(value.lower())
                            elif attribute == "religion":
                                # Check for religious synonyms.
                                religious_synonyms = {
                                    "christian": ["Christianity", "Catholic"],
                                    "muslim": ["Islam"],
                                    "jewish": ["Judaism", "Jew"],
                                    "hindu": ["Hinduism"],
                                    "buddhist": ["Buddhism"],
                                    "unaffiliated": ["atheist", "secular", "atheism", "secularism", "agnostic", "agnosticism"],
                                } 

                                if value in religious_synonyms.keys():
                                    corresponding_synonyms = religious_synonyms[value]
                                    
                                    for synonym in corresponding_synonyms:
                                        found = text.lower().find(synonym.lower())
                                        if found != -1:
                                            break

                            # Notify the user and update the list if the value is still not found after checking for variants.
                            if found == -1:
                                # print(attribute, value, "was not found in the text for:", key)
                                num_attribute_not_found += 1
                                all_attributes_found = False
                                attributes_not_found.append((attribute, key))
                                
                    # If not all attributes were found, increment the count of texts with attributes not found.
                    if not all_attributes_found:
                        num_texts_attribute_not_found += 1

                    # Analyze the sentiment.
                    analysis = TextBlob(text)

                    # Add the sentiment to the attributes dictionary.
                    generated_texts[key]["attributes"]["polarity"] = analysis.polarity
                    generated_texts[key]["attributes"]["subjectivity"] = analysis.subjectivity

            print("Done analzying sentiment.")

            # Write the dictionary to the output file as JSON data.
            with open(json_path, "w") as f:
                json.dump(generated_texts, f)

            print("New contents saved.")

    # Print the summary of the analysis.
    print("Total number of attributes not found in text:", num_attribute_not_found)
    print("Total number of texts processed:", num_texts)
    print("Total number of texts with attributes not found:", num_texts_attribute_not_found)
    print("Percentage of texts with attributes not found:", num_texts_attribute_not_found / num_texts)
    # Notify the user of texts where the attribute was not found.
    print("Attributes not found in texts:")
    print(attributes_not_found)