## Setup

In [None]:
identity = "women"

output_file = f"gpt3.5_generated_hs_dataset_{identity}.json"
print(output_file)

In [None]:
import openai
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [None]:
def get_completion(prompt, model="gpt-3.5-turbo",temperature=0): # Andrew mentioned that the prompt/ completion paradigm is preferable for this class
    messages = [{"role": "system", "content": system_message}, 
                {"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [None]:
# helper constant and methods to prepare the prompts

system_message = """You are a crowd-source worker who composes messages for a hate speech 
                    project. Write messages with diverse style and length."""

def prompt_factory(msg, n_examples=10, demonstration=None):
    if demonstration:
        return f"""
            Write {n_examples} messages that {msg}.
            Example: ```{demonstration}```
            Return the result in JSON list format with the following key: messages.
            Result:
            """
    else:
        return f"""
            Write {n_examples} messages that {msg}.
            Return the result in JSON list format with the following key: messages.
            Result:
            """

In [None]:
import json
from math import ceil
import logging

def get_messages(prompt_phrase, demonstration=None, n=40, temperature=0.5):
    if not demonstration:
        prompt = prompt_factory(prompt_phrase, n_examples=20)
    else:
        prompt = prompt_factory(prompt_phrase, demonstration=demonstration, n_examples=20)
    
    print(prompt)
    result = list()
    num_calls = 0
    while len(result) < n:
        response = get_completion(prompt, temperature=temperature)
        try:
            response_json = json.loads(response)
            result.extend(response_json['messages'])
        except Exception:
            logging.exception("An exception was thrown!")
        print(f"num_calls: {num_calls}; num_messages: {len(result)}")
        num_calls += 1
    return [prompt_phrase]*len(result), result

## Generation

In [None]:
# initialize result. IMPORTANT: run only once, otherwise it'll be erased
prompts, messages = list(), list()

In [None]:
# F1: Expression of strong negative emotions (explicit)
prompt_phrase = f"""express strong negative emotions against {identity}"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F2: Description using very negative attributes (explicit)
prompt_phrase = f"""mention very negative attributes of {identity}"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F3: Dehumanisation (explicit)
prompt_phrase = f"""dehumanize {identity}"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F4: Implicit derogation
prompt_phrase = f"""implicitly derogates {identity}"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F5: Direct threat
prompt_phrase = f"""contain direct threat to {identity} mentioning their identity"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F6: Threat as normative statement
prompt_phrase = f"""contain threat to {identity} as a normative statement"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F7: Hate expressed using slur
prompt_phrase = f"""contain hate expressions against {identity} using slurs"""
example = "Bitches just can't help themselves."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F8: Non-hateful homonyms of slurs
prompt_phrase = f"""contain non-hateful homonyms of slurs of {identity}"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F9: Reclaimed slurs
prompt_phrase = f"""reclaim one of the following slurs referring to {identity} to show endearment in a non-hateful context.
Slurs: ```bitch, bitches```"""
example = "I'm the bitch you wish you could be."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F10: Hate expressed using profanity
prompt_phrase = f"""contain hate expressions against {identity} using profanity"""
example = f"All {identity} should fuck off forever."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F11: Non-hateful use of profanity
# Omitted. non identity-specific

In [None]:
# F12: Hate expressed through reference in subsequent clauses
prompt_phrase = f"""
1. each contain two clauses
2. the second clause contains hate expressions against {identity}"""
example = f"Whenever {identity} look at me, I think they can sense that I feel nothing but contempt for them."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F13: Hate expressed through reference in subsequent sentences
prompt_phrase = f"""
1. each contain two sentenses
2. the second sentence contains hate expressions against {identity}"""
example = f"I know {identity} like you can't help it. You're just all that dumb."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F14: Hate expressed using negated positive statement
prompt_phrase = f"""contain hate expressed against {identity} by negating a positive statement"""
example = f"{identity} are never any intelligent."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F15: Non-hate expressed using negated hateful statement
prompt_phrase = f"""are not offensive and mention {identity} in a negated hateful statement"""
example = f"No {identity} deserves to die."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F16: Hate phrased as a question
prompt_phrase = f"""contain hate phrases against {identity} as a question"""
example= f"Do people really not realise that {identity} are disgusting?"

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F17: Hate phrased as an opinion
prompt_phrase = f"""contain hate phrases against {identity} as an opinion"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F18: Neutral statements using protected group identifier
prompt_phrase = f"""contain factual statements about {identity} with neither positive or negative sentiment"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F19: Positive statements using pro- tected group identifier
prompt_phrase = f"""contain positive statements about {identity}"""

batch_prompts, batch_messages = get_messages(prompt_phrase)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F20: Denouncements of hate that quote it
prompt_phrase = f"""
1. mention hateful statements against {identity} in quote
2. denounce the statement"""
example = f"Statements like ‘{identity} are scum’ are deeply hurtful."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F21: Denouncements of hate that make direct reference to it
prompt_phrase = f"""mention a hateful statement made against {identity} and denounce it. 
Make sure your response does not contain generic words like 'hate', 'hateful', or 'derogatory'"""
example = f"""I lost all respect for you when you said {identity} like me were pathetic."""

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
# F22: Abuse targeted at objects
# Omitted since not targeting an identity

In [None]:
# F23: Abuse targeted at individuals (not as member of a prot. group)
# Omitted since not targeting an identity

In [None]:
# F24: Abuse targeted at non-protected groups (e.g. professions)
# Omitted since not targeting an identity

In [None]:
# F25-29: Spelling variation
prompt_phrase = f"""contain hateful statements against {identity} with different kinds of misspellings"""
example = f"No one else is as f1lthy as these {identity}."

batch_prompts, batch_messages = get_messages(prompt_phrase, demonstration=example)
prompts.extend(batch_prompts)
messages.extend(batch_messages)

# save after each prompt in case the instance got shut down
final_json = {"prompt": prompts, "message": messages}
with open(output_file, "w+") as f:
    f.write(f"{final_json}")

In [None]:
len(messages), len(prompts)