# 1.2. Separate Prompt Types
This code creates two CSV files describing the types of prompts used in this study. 

The first file contains implicit bias prompt types, and the second file contains explicit bias prompt types.

Each spreadsheet has five columns: type, first_row, last_row, json_name, and category.

The "type" column describes the type of bias that the set of prompts is testing e.g. Male.

The "first_row" and "last_row" columns refer to the range of row indices (inclusive) that the prompts for that type of bias are located in in the CSV files generated from the data_preprocessing.ipynb file.

The "json_name" column refers to the name of the output JSON file that the generated texts for the type of bias should be stored in, which is used by the 2.1_generate_LLM_texts_from_prompts.ipynb file in the next stage.

The "category" column refers to the category that the prompt type falls under e.g. Gender.

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load the intersectional bias prompts CSV file.
# This file contains prompts for both implicit and explicit bias.
prompts_df = pd.read_csv("intersectional_bias_prompts.csv")
prompts_df = prompts_df.drop(prompts_df.columns[0], axis=1)

# Drop explicit bias prompts for implicit_prompts_df.
implicit_prompts_df = prompts_df[prompts_df.bias_type!="explicit"]
# Drop implicit bias prompts for explicit_prompts_df.
explicit_prompts_df = prompts_df[prompts_df.bias_type!="implicit"]

# Check size of dataframes before filtering.
print(implicit_prompts_df.shape[0])
print(explicit_prompts_df.shape[0])

# Check size and content of dataframes after filtering.
print(implicit_prompts_df.shape[0])
print(explicit_prompts_df.shape[0])
print(implicit_prompts_df.head())
print(explicit_prompts_df.head())

# Save the dataframes.
implicit_prompts_df.to_csv("implicit_bias_prompts.csv")
explicit_prompts_df.to_csv("explicit_bias_prompts.csv")

In [None]:
# Create a new dataframe with the first and last row of each prompt type.
implicit_prompt_types_df = pd.DataFrame(columns=["type", "first_row", "last_row", "json_name", "category"])
explicit_prompt_types_df = pd.DataFrame(columns=["type", "first_row", "last_row", "json_name", "category"])
# Check the dataframes for implicit and explicit prompts.
print(implicit_prompt_types_df.head())
print(implicit_prompts_df["value"].iloc[0])
print(explicit_prompt_types_df.head())
print(explicit_prompts_df["value"].iloc[0])

In [None]:
# Get the first and last rows of each prompt type for the implicit bias prompts.
num_rows = implicit_prompts_df.shape[0]
first_row = 0
last_row = 0
# Start with the first prompt's value and category.
last_prompt_value = implicit_prompts_df.value.iloc[0]
last_prompt_category = implicit_prompts_df.category.iloc[0]

# Iterate through the rows of the implicit prompts dataframe.
# For each row, check if the prompt value has changed.
# If it has, add the previous prompt type to the new dataframe.
# If it hasn't, continue to the next row.
for row_index in range(0, num_rows):
    # A new type starts.
    curr_prompt_value = implicit_prompts_df.value.iloc[row_index]
    curr_prompt_category = implicit_prompts_df.category.iloc[row_index]

    # If the current prompt value is different from the last prompt value, add the last type to the dataframe.
    # If it hasn't changed, continue to the next row.
    if curr_prompt_value != last_prompt_value and row_index > 0:
        # Add the last type to the dataframe.
        last_row = row_index - 1

        # Create a new row for the implicit prompt types dataframe.
        new_row = pd.DataFrame([[
            last_prompt_value, first_row, last_row, "_".join(last_prompt_value.lower().split()) + ".json", last_prompt_category
        ]], columns=implicit_prompt_types_df.columns)

        # Append the new row to the implicit prompt types dataframe.
        implicit_prompt_types_df = pd.concat([implicit_prompt_types_df, new_row], ignore_index=True)

        # Start a new type.
        first_row = row_index
        last_prompt_value = curr_prompt_value
        last_prompt_category = curr_prompt_category

# Add the last type.
new_row = pd.DataFrame([[
            last_prompt_value, first_row, num_rows - 1, "_".join(last_prompt_value.lower().split()) + ".json", last_prompt_category
        ]], columns=implicit_prompt_types_df.columns)

# Append the last row to the implicit prompt types dataframe.
implicit_prompt_types_df = pd.concat([implicit_prompt_types_df, new_row], ignore_index=True)

In [None]:
# Get the first and last rows of each prompt type for the explicit bias prompts.
num_rows = explicit_prompts_df.shape[0]
first_row = 0
last_row = 0
# Start with the first prompt's value.
last_prompt_value = explicit_prompts_df.value.iloc[0]
last_prompt_category = explicit_prompts_df.category.iloc[0]

# Iterate through the rows of the explicit prompts dataframe.
for row_index in range(0, num_rows):
    # A new type starts.
    curr_prompt_value = explicit_prompts_df.value.iloc[row_index]
    curr_prompt_category = explicit_prompts_df.category.iloc[row_index]

    # If the current prompt value is different from the last prompt value, add the last type to the dataframe.
    # If it hasn't changed, continue to the next row.
    if curr_prompt_value != last_prompt_value and row_index > 0:
        # Add the last type to the dataframe.
        last_row = row_index - 1

        # Create a new row for the explicit prompt types dataframe.
        new_row = pd.DataFrame([[
            last_prompt_value, first_row, last_row, "_".join(last_prompt_value.lower().split()) + ".json", last_prompt_category
        ]], columns=explicit_prompt_types_df.columns)

        # Append the new row to the explicit prompt types dataframe.
        explicit_prompt_types_df = pd.concat([explicit_prompt_types_df, new_row], ignore_index=True)

        # Start a new type.
        first_row = row_index
        last_prompt_value = curr_prompt_value
        last_prompt_category = curr_prompt_category

# Add the last type.
new_row = pd.DataFrame([[
            last_prompt_value, first_row, num_rows - 1, "_".join(last_prompt_value.lower().split()) + ".json", last_prompt_category
        ]], columns=explicit_prompt_types_df.columns)

# Append the last row to the explicit prompt types dataframe.
explicit_prompt_types_df = pd.concat([explicit_prompt_types_df, new_row], ignore_index=True)

In [None]:
# Save the new dataframes with the first and last rows of each prompt type.
implicit_prompt_types_df.to_csv("implicit_prompt_types.csv")
explicit_prompt_types_df.to_csv("explicit_prompt_types.csv")

# Print the first and last few rows of each dataframe for verification.
print("Implicit Prompts:")
print(implicit_prompt_types_df.head())
print(implicit_prompt_types_df.tail())
print("Explicit Prompts:")
print(explicit_prompt_types_df.head())
print(explicit_prompt_types_df.tail())