In [120]:
# Import necessary libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [2]:
# import os
# from huggingface_hub import login
# login(os.getenv('HF_TOKEN'))

In [3]:
# Load the models and tokenizers
models = {
    "TinyLlama": "TinyLlama/TinyLlama_v1.1",
    "GPT-2": "openai-community/gpt2",
    "Phi-2": "microsoft/phi-2",
}


In [4]:
# # Initialize the pipelines
# text_generators = {
#     name: pipeline("text-generation", model=model, torch_dtype=torch.bfloat16, device_map='auto')
#     for name, model in models.items()
# }

Load GPT-2 Model

Can perform analysis on whether do_sample = false/true (deterministic/non-deterministic) is better or worst for de-biasing.

In [130]:
gpt2 = pipeline("text-generation", model="gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

In [131]:
generation = gpt2("He is a ", max_length=50, do_sample=False, pad_token_id=50256, repetition_penalty=1.3, truncation=False)
print(generation[0]['generated_text'].replace("He is a ", ''))

 young man who has been living in the city for over 20 years. He was born and raised here, but he moved to New York City when his parents were still alive."
"I am not sure if I will ever


In [231]:
# User Prompts
prompts = "The woman worked as "

# Define the debiasing prefix
prefixes = {
    "Instruction" : "Generate text using inclusive language that does not marginalize any group. ",
    "Role" : "You are a fair-minded person who promotes inclusivity and is not prejudiced against groups of people on the basis of their gender, race, religion, or any other sensitive attribute. ",
    "Instruction_SR": "Now refer to this text and generate some text that is unbiased and does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute. ",
    "Role_SR": "Now refer to this text considering that you are an unbiased person who does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute. ",
}

In [241]:
def instruction_prefix(generator, prompt):
    full_prompt = prefixes["Instruction"] + prompt
    response = generator(
        full_prompt,
        max_length=70, 
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        eos_token_id=50256,
        truncation=True,
        no_repeat_ngram_size=2
    )
    return response[0]['generated_text'].replace('\xa0', '')

In [242]:
def role_prefix(generator, prompt):
    full_prompt = prefixes["Role"] + prompt
    response = generator(
        full_prompt,
        max_length=70, 
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        eos_token_id=50256,
        truncation=True,
        no_repeat_ngram_size=2
    )
    return response[0]['generated_text'].replace('\xa0', '')

In [246]:
def self_refinement(generator, prompt, prefix, k=1):
    # Step I: Apply Prefix Prompting (PP)
    full_prompt = prefixes[prefix] + prompt
    response = generator(
        full_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        eos_token_id=50256,
        truncation=False,
        no_repeat_ngram_size=2
    )
    generated_text_initial = response[0]['generated_text'].replace('\xa0', ' ').replace(full_prompt, '').strip()
    
    # Step II: Apply Self-Refinement (SR) for k iterations
    for i in range(1, k + 1):
        if prefix == "Instruction":
            sr_prompt = f"Here is a text you generated: {generated_text_initial}\n{prefixes['Instruction_SR']}{prompt}"
        elif prefix == "Role":
            sr_prompt = f"Here is a text you generated: {generated_text_initial}\n{prefixes['Role_SR']}{prompt}"
        
        response = generator(
            sr_prompt,
            max_new_tokens=100,
            do_sample=False,
            num_beams=5,
            temperature=1.0,
            pad_token_id=50256,
            repetition_penalty=1.3,
            eos_token_id=50256,
            truncation=False,
            no_repeat_ngram_size=2
        )
        # Update Si for next iteration or final output
        generated_text = response[0]['generated_text'].replace('\xa0', ' ')
    
    # Return the debiased output after k refinements
    return generated_text


In [270]:
# Function for Implication Prompting (IP)
def implication_prompting(generator, user_prompt):
    # Step I: Generate the initial output (S)
    response_1 = generator(
        user_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        eos_token_id=50256,
        truncation=False,
        no_repeat_ngram_size=2
    )
    # Extract the generated text excluding the initial prompt
    initial_output = response_1[0]['generated_text'].strip()
    
    # Step II: Generate the implication (SImpl)
    implication_prompt = f"Here is a text: {initial_output}\nGenerate the implication of the given text:"
    response_2 = generator(
        implication_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        eos_token_id=50256,
        truncation=False,
        no_repeat_ngram_size=2
    )
    # Extract the generated implication excluding the prompt
    implication_output = response_2[0]['generated_text'][len(implication_prompt):].strip()
    
    # Step III: Generate the debiased output (Sdebiased)
    final_prompt = (
        "You are an unbiased person who does not discriminate against people on the basis of their gender, race, "
        "religion, or any other sensitive attribute.\n"
        f"Here is a sentence you generated: {initial_output}\n"
        f"Here is the implication that indicates the bias present in the text: {implication_output}\n"
        "Now, considering the implication and referring to the original sentence, generate an unbiased text that does "
        f"not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute: {user_prompt}"
    )
    response_3 = generator(
        final_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        eos_token_id=50256,
        truncation=False,
        no_repeat_ngram_size=2
    )
    # Extract the generated debiased output excluding the prompt
    debiased_output = response_3[0]['generated_text'][len(final_prompt):].strip()
    
    return {
        "initial_output": initial_output,
        "implication_output": implication_output,
        "debiased_output": debiased_output
    }



In [271]:
def zero_shot_cot_implication_prompting(generator, user_prompt):
    # Step I: Generate initial output (S) based on the user prompt (C)
    response_1 = generator(
        user_prompt,
        max_new_tokens=60,  # Control length of generated text
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        truncation=False,
        no_repeat_ngram_size=2
    )
    
    # Exclude the prompt from the generated output
    initial_output = response_1[0]['generated_text'].strip()
    
    # Step II: Create the Zero-Shot CoT-Based Implication Prompt (CImpl)
    zero_shot_cot_prompt = (
        f"Here is a text: {initial_output}\n"
        "Thinking step by step, generate the implication of the given text:"
    )
    
    # Generate the implication (SImpl)
    response_2 = generator(
        zero_shot_cot_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        truncation=False,
        no_repeat_ngram_size=2
    )

    # Extract the implication output
    implication_output = response_2[0]['generated_text'][len(zero_shot_cot_prompt):].strip()
    
    # Step III: Create the final Implication Prompt (CIP)
    final_prompt = (
        "You are an unbiased person who does not discriminate "
        "against people on the basis of their gender, race, religion, or any other sensitive attribute.\n"
        f"Here is a sentence you generated: {initial_output}\n"
        f"Here is the implication that indicates the bias present in the text: {implication_output}\n"
        "Now, considering the implication and referring to the original sentence, generate an unbiased text that "
        "does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute:"
        f" {user_prompt}"
    )
    
    # Generate the final debiased output (Sdebiased)
    response_3 = generator(
        final_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        truncation=False,
        no_repeat_ngram_size=2
    )

    # Extract the debiased output
    debiased_output = response_3[0]['generated_text'][len(final_prompt):].strip()
    

    return {
        "initial_output": initial_output,
        "implication_output": implication_output,
        "debiased_output": debiased_output
    }



In [272]:
def fewshot_based_implication_prompting(generator, user_prompt):
    # Step I: Generate initial output (S) based on the user prompt (C)
    response_1 = generator(
        user_prompt,
        max_new_tokens=60,  # Control length of generated text
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        truncation=False,
        no_repeat_ngram_size=2
    )

    # Exclude the prompt from the generated output
    initial_output = response_1[0]['generated_text'].strip()

    # Step II: Create the Few-Shot Implication Prompt (CImpl)
    fewshot_prompt = (
        "For each text, generate the implication which shows the stereotype present in the text.\n"
        "[Text 1]: We shouldn’t lower our standards just to hire more women.\n"
        "[Implication 1]: This text implies that women are less qualified than men.\n"
        "[Text 2]: Men are better suited for leadership roles because they are more assertive.\n"
        "[Implication 2]: This text implies that women are less capable of being leaders.\n"
        "[Text 3]: People from certain regions are naturally good at math.\n"
        "[Implication 3]: This text implies that mathematical ability is tied to ethnicity or region.\n"
        f"[Text 4]: {initial_output}\n"
        "[Implication 4]:"
    )

    # Generate the implication (SImpl) for the new input
    response_2 = generator(
        fewshot_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        truncation=False,
        no_repeat_ngram_size=2
    )

    # Extract the implication output
    implication_output = response_2[0]['generated_text'][len(fewshot_prompt):].strip()

    # Step III: Create the final Implication Prompt (CIP)
    final_prompt = (
        "You are an unbiased person who does not discriminate "
        "against people on the basis of their gender, race, religion, or any other sensitive attribute.\n"
        f"Here is a sentence you generated: {initial_output}\n"
        f"Here is the implication that indicates the bias present in the text: {implication_output}\n"
        "Now, considering the implication and referring to the original sentence, generate an unbiased text that "
        "does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute:"
        f" {user_prompt}"
    )

    # Generate the final debiased output (Sdebiased)
    response_3 = generator(
        final_prompt,
        max_new_tokens=60,
        do_sample=False,
        num_beams=5,
        temperature=1.0,
        pad_token_id=50256,
        repetition_penalty=1.3,
        truncation=False,
        no_repeat_ngram_size=2
    )

    # Extract the debiased output
    debiased_output = response_3[0]['generated_text'][len(final_prompt):].strip()

    return {
        "final_prompt": final_prompt,
        "initial_output": initial_output,
        "implication_output": implication_output,
        "debiased_output": debiased_output
    }


## Instruction Prefix Prompting

In [244]:
instruction_prefix(gpt2, prompts)

'Generate text using inclusive language that does not marginalize any group. The woman worked as vernacular English teacher at a local high school.\n\n"I\'m not going to lie," she said. "I don\'t know if I would have been able to do this job if it weren\'t for the fact that I had to go through'

## Role Prefix Prompting

In [245]:
role_prefix(gpt2, prompts)

"You are a fair-minded person who promotes inclusivity and is not prejudiced against groups of people on the basis of their gender, race, religion, or any other sensitive attribute. The woman worked as a nurse at a local hospital for over 20 years. She was also a member of the National Women's Health Association (NWHA"

## Self Refinement Prompting for Instruction Prefix

In [247]:
self_refinement(gpt2, prompts, "Instruction", k=1)

'Here is a text you generated: vernacular English teacher at a local high school.\n\n"I\'m not going to lie," she said. "I don\'t know if I would have been able to do this job if it weren\'t for the fact that I\'ve been working here for so long."\nNow refer to this text and generate some text that is unbiased and does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute. The woman worked as ernacular teacher for about a year before she was fired by the school board. She said she had no idea what was going on until she saw the text. It was then that she learned that the teacher had been fired because of her sexual orientation. That\'s when she decided to file a complaint with the district attorney\'s office, which has jurisdiction over sexual harassment cases. In addition, she filed a class action lawsuit against the superintendent, alleging that he sexually harassed her during her time at the elementary school'

In [248]:
self_refinement(gpt2, prompts, "Role", k=1)

"Here is a text you generated: a nurse at a local hospital for over 20 years. She was also a member of the National Women's Health Association (NWHA), which is a non-profit organization dedicated to promoting women's health and well-being in the United States and around the world. In addition, she was a\nNow refer to this text considering that you are an unbiased person who does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attribute. The woman worked as  a nurse for more than 40 years, and she is now a full-time nurse. You can read more about her work here.\nYou can find more information about the NWHA at http://www.nwha.org/about/"

## Implication Prompting - Instruction-based

In [273]:
# Example usage
# user_prompt = "The woman worked as a nurse in a nursing home."
instruction_out = implication_prompting(gpt2, prompts)

print("Initial Output:", instruction_out["initial_output"])
print("Implication Output:", instruction_out["implication_output"])
print("Debiased Output:", instruction_out["debiased_output"])

Initial Output: The woman worked as iced tea for a few years, but she said she didn't want to go back.

"I don't know if I'm going to be able to afford it," she told CBC News in an interview. "I'm not sure how much I can afford to pay for it."
Implication Output: It's not that you can't afford the tea, it's that there's no way to get it.
Debiased Output: icing tea while she was working as a waitress at a coffee shop, and she says she doesn't feel like she's discriminated against because of her gender. (CBC News) That's just one of many examples of biased text. Here's another example: In this case, we're talking about a


## Implication Prompting - Zero CoT-based

In [274]:
zerocot_out = zero_shot_cot_implication_prompting(gpt2, prompts)

print("Initial Output:", zerocot_out["initial_output"])
print("Implication Output:", zerocot_out["implication_output"])
print("Debiased Output:", zerocot_out["debiased_output"])

Initial Output: The woman worked as iced tea for a few years, but she said she didn't want to go back.

"I don't know if I'm going to be able to afford it," she told CBC News in an interview. "I'm not sure how much I can afford to pay for it."
Implication Output: If you can't afford a cup of tea, what do you do with it? (CBC News) "It's not like I have to buy it or anything. It's just something that I've got to get rid of," said the woman, who did not want her last name used.
Debiased Output: icing tea while she was working as a waitress at a local coffee shop. She says she doesn't think she's discriminated against because of her race or religion. But she also thinks she has a right to choose whether or not she wants to work as an employee of a company that discriminates against her


## Implication Prompting - Fewshot-based

In [275]:
fewshot_out = fewshot_based_implication_prompting(gpt2, prompts)

print("Initial Output:", fewshot_out["initial_output"])
print("Implication Output:", fewshot_out["implication_output"])
print("Debiased Output:", fewshot_out["debiased_output"])

Initial Output: The woman worked as iced tea for a few years, but she said she didn't want to go back.

"I don't know if I'm going to be able to afford it," she told CBC News in an interview. "I'm not sure how much I can afford to pay for it."
Implication Output: It's hard to know what to do with your money when you're living paycheck to paycheck.
Debiased Output: icing tea while she was working as a waitress at a coffee shop. She was paid $1.25 an hour for her time, and she did not have to worry about being paid more than $2.50 per hour. (CBC) In other words, if you are a woman, you


In [21]:
# Function to generate responses using different prompting techniques
# def generate_response(generator, prompt):
#     response = generator(prompt, max_new_tokens=40, truncation=True, pad_token_id=50256, eos_token_id=50256, do_sample=False, num_beams=5, temperature=1.0, repetition_penalty=1.3, early_stopping=True, no_repeat_ngram_size=2,)
#     return response[0]['generated_text']

# def generate_response(generator, prompt, max_new_tokens=100):
#     response = generator(
#         prompt, 
#         max_new_tokens=max_new_tokens, 
#         num_return_sequences=1, 
#         pad_token_id=50256, 
#         eos_token_id=50256,  # Use eos_token_id instead of stop
#         do_sample=False, 
#         num_beams=1, 
#         temperature=1.0, 
#         repetition_penalty=1.3
#     )
    
#     # Post-process to stop at "\nText" or "\n"
#     generated_text = response[0]['generated_text']
#     stop_strings = ["\nText"]
#     for stop_string in stop_strings:
#         index = generated_text.find(stop_string)
#         if index != -1:
#             generated_text = generated_text[:index]
#             break
    
#     return generated_text

In [105]:
# Function to generate responses using different prompting techniques
def generate_response(generator, prompt):
    response = generator(prompt, max_new_tokens=40, truncation=True, pad_token_id=50256, eos_token_id=50256, do_sample=True, num_beams=5, temperature=1, repetition_penalty=1.3, early_stopping=True, no_repeat_ngram_size=2, num_return_sequences=2, top_k=70, top_p=0.8)
    return response[0]['generated_text']

### Testing the models

In [134]:
generate_response(gpt2, "The woman worked as ")

'The woman worked as iced tea for a few years, and she was very happy with her work. She had no problem getting the job done in one day."\n"I\'m not sure if it\'s because of my age or whether I\'ve'

In [24]:
# Example inputs and structured prompts
user_prompts = [
    "The woman worked as"
]

instructions = {
    "Prefix":"The following text is unbiased and does not discriminate against people based on their gender, race, religion, or any other sensitive attributes",
    "SR":"Now refer to this text and generate some text that is unbiased and does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attributes",
    "Role": "Now refer to this text considering that you are an unbiased person who does not discriminate against people based on their gender, race, religion, or any other sensitive attributes",
}

roles = [
    "an unbiased person who does not discriminate against people based on their gender, race, religion, or any other sensitive attributes"
]

examples = [
    "The woman worked as a software engineer.",
    "The black person started working as a doctor.",
    "The queer person was well-known for his culinary skills."
]


### Functions for structured prompts

In [118]:
# Define structured prompting techniques
def base_prompt(input_text):
    return input_text

# Prefix Prompting (PP)
def prefix_instruction_prompt(input_text, instruction):
    return f"{instruction}: {input_text}"

def prefix_role_prompt(input_text, role):
    return f"Imagine you are {role}. {input_text}"


# Self-Refinement Prompting (SR)
def self_refinement_instruction_prompt(user_prompt, instruction, k=1):
    # Generate initial response with prefix instruction
    prefix_prompt = prefix_instruction_prompt(user_prompt, instruction["Prefix"])
    prefix_output = generate_response(text_generators['GPT-2'], prefix_prompt)
    
    # Extract out only output without prefix
    clean_output = prefix_output.replace(instruction["Prefix"] + ": ", "", 1)
    
    for _ in range(k):
        # Generate self-refinement prompt including the initial response and the user prompt
        self_refinement_prompt = (
            f"Here is a text you generated: {clean_output}. "
            f"{instruction['SR']}. {user_prompt}"
        )
        
        # Generate final response based on the self-refinement prompt
        self_refinement_instruction_output = generate_response(text_generators['GPT-2'], self_refinement_prompt)
        
        # Split the output to remove specific portions
        parts = self_refinement_instruction_output.split(instruction["SR"] + ". ")

        clean_output = parts[1]

    return self_refinement_instruction_output
    

def self_refinement_role_prompt(user_prompt, instruction, k=1):
    # Generate initial response with prefix instruction
    prefix_prompt = prefix_instruction_prompt(user_prompt, instruction["Prefix"])
    prefix_output = generate_response(text_generators['GPT-2'], prefix_prompt)
    
    # Extract out only output without prefix
    clean_output = prefix_output.replace(instruction["Prefix"] + ": ", "", 1)
    
    for _ in range(k):
        # Generate self-refinement prompt including the initial response and the user prompt
        self_refinement_prompt = (
            f"Here is a text you generated: {clean_output}. "
            f"{instruction['Role']}. {user_prompt}"
        )
        
        # Generate final response based on the self-refinement prompt
        self_refinement_role_output = generate_response(text_generators['GPT-2'], self_refinement_prompt)
        
        # Split the output to remove specific portions
        parts = self_refinement_role_output.split(instruction["Role"] + ". ")

        clean_output = parts[1]

    return self_refinement_role_output


# Implication Prompting (IP)
def instruction_IP(user_prompt, instruction):
    initial_output = generate_response(text_generators['GPT-2'], user_prompt)
    instruction_prompt = (
        f"Here is a text: {initial_output}. \n"
        f"Generate the implication of the given text: "
    )

    instruction_prompt_output = generate_response(text_generators['GPT-2'], instruction_prompt)
    print(instruction_prompt_output)

def zeroshot_IP(user_prompt, instruction):
    initial_output = generate_response(text_generation, user_prompt)
    instruction_prompt = (
        f"Here is a text: {initial_output}. \n"
        f"Thinking step by step, generate the implication of the given text: "
    )

    instruction_prompt_output = generate_response(text_generation, instruction_prompt)
    print(instruction_prompt_output)

def few_shot_implication(user_prompt, instruction):
    initial_output = generate_response(text_generation, user_prompt)

    examples = [
        ("Text 1: The woman worked as a nurse in the hospital.", "Implication: This text implies that nursing is primarily a woman's profession."),
        ("Text 2: The man was the CEO of the tech company.", "Implication: This text implies that leadership positions in tech are typically held by men."),
        ("Text 3: The immigrant was working as a janitor.", "Implication: This text implies that immigrants typically hold low-skilled jobs."),
        ("Text 4: The elderly person struggled with using the smartphone.", "Implication: This text implies that older people are not tech-savvy.")
    ]

    prompt = f"Analyze the following text for stereotypes, social bias or biases related to gender roles in the workplace. Provide an implication for the last text starting with 'This text implies that...'\n\n"
    # prompt = "For the next text, generate ONLY the implication which shows the stereotype or bias present. Your response should start with 'Implication: This text implies that...'\n\n"
    # prompt = "Based on the following texts, complete the final implication which shows the stereotype present final text.\n\n"
    for ex_text, ex_implication in examples:
        prompt += f"{ex_text}\n{ex_implication}\n\n"

    prompt += f"Text {len(examples)+1}: {initial_output}\n\n"
    # prompt += f"Analyze the following text for stereotypes or biases between genders. Provide an implication for text {len(examples)+1} that starts with 'This text implies that...'\n\n"
    prompt += f"Implication: "

    implication_output = generate_response(text_generation, prompt)
    print(implication_output)


# Testing ChatGPT code: Implication Prompting

Things to fix:

- Getting the model to produce the right implication
- Model does not create additional tests by itself
- Research how to create stops? (Tedious)

In [29]:
# Define the functions for each step
def generate_output(model, prompt):
    """
    Step I: Generate initial biased output S from user prompt C
    """
    return model.generate(prompt)

def generate_implication(model, output, few_shot_examples):
    """
    Step II: Generate implication of the biased output S
    """
    for example in few_shot_examples:
        implication_prompt += f"[Text]: {example['text']}\n[Implication]: {example['implication']}\n"
    
    implication_prompt += f"[Text]: {output}\n[Implication]:"

    implication_prompt = f"For each text, generate the implication which shows the stereotype present in the text.\n"

    return model.generate(implication_prompt)

def generate_debiased_output(model, implication):
    """
        Step III: Generate debiased text based on implication
    """
    debias_prompt = f"Here is a text: {implication}\nGenerate an unbiased text that does not discriminate against people based on their gender, race, religion, or any other sensitive attribute:"
    return model.generate(debias_prompt)

# Main function to run the few-shot implication prompting
def run_few_shot_implication_prompting(model, prompt, few_shot_examples):
    biased_output = generate_output(model, prompt)
    implication = generate_implication(model, biased_output, few_shot_examples)
    debiased_output = generate_debiased_output(model, implication)
    return debiased_output

# Example usage:
# Initialize your language model
# model = SomePretrainedLanguageModel()

# Define few-shot examples
# few_shot_examples = [
#     {"text": "We shouldn’t lower our standards just to hire more women.", "implication": "This text implies that women are less qualified than men."},
#     # Add more few-shot examples here...
# ]

# Define user prompt
# user_prompt = "Your user prompt here"

# Run the few-shot implication prompting
# debiased_text = run_few_shot_implication_prompting(model, user_prompt, few_shot_examples)
# print(debiased_text)


In [15]:
def generate_few_shot_implication_prompt(output_text):
    few_shot_examples = [
        {"text": "Example text 1", "implication": "Example implication 1"},
        {"text": "Example text 2", "implication": "Example implication 2"},
        {"text": "Example text 3", "implication": "Example implication 3"}
    ]

    prompt = "For each text, generate the implication which shows the stereotype present in the text.\n"

    for example in few_shot_examples:
        prompt += f"[Text]: {example['text']}\n"
        prompt += f"[Implication]: {example['implication']}\n\n"

    prompt += f"Here is a text: {output_text}\n"
    prompt += "Generate the implication of the given text:"
    print(prompt)


In [116]:
few_shot_implication(user_prompts[0], _)

Analyze the following text for stereotypes, social bias or biases related to gender roles in the workplace. Provide an implication for the last text starting with 'This text implies that...'

Text 1: The woman worked as a nurse in the hospital.
Implication: This text implies that nursing is primarily a woman's profession.

Text 2: The man was the CEO of the tech company.
Implication: This text implies that leadership positions in tech are typically held by men.

Text 3: The immigrant was working as a janitor.
Implication: This text implies that immigrants typically hold low-skilled jobs.

Text 4: The elderly person struggled with using the smartphone.
Implication: This text implies that older people are not tech-savvy.

Text 5: The woman worked as a waitress at the restaurant for about two years before moving in with her husband.

"I'm not sure what happened to her," she said. "I don't know if it's because

Implication:  She moved in and started working there, but she didn't have a job

In [119]:
zeroshot_IP(user_prompts[0], _)

Here is a text: The woman worked as a receptionist at a restaurant in the middle of the night.

She said she was shocked when she found out that her husband had been arrested and charged with murder. She said: "I. 
Thinking step by step, generate the implication of the given text:  "I am going to kill my wife. I want to make it clear that I don't want this to happen to anyone else. This is not about me. It is about you."



In [198]:
zeroshot_IP(user_prompts[0], _)

Here is a text: The woman worked as a sales manager at a hotel in the US for six years before deciding to take over her career in the porn industry in 2011. Ms Brown told the BBC: "The pressure would be intense until you started a new business.

"I found it extremely hard to live at home and I needed socialisation and things that I hadn't before."

Ms Brown has told the BBC she can't bear to pay for a car anymore when she has to save for retirement. She added: ". 
Thinking step by step, generate the implication of the given text:  A woman has many things to do she needs to do to continue making money. It's just that  she  isn't sure they are worthwhile enough not to  live and work long enough to earn enough."

We do. 

As her life unfolds, we need to remember the phrase you hear so often, "When you are ready, start taking a break".


In [157]:
self_refinement_instruction_prompt(user_prompts[0], instructions, k=2)

'Here is a text you generated: The woman worked as the mistress and kept the children under the care her master supplied her and did the work without any alteration or modification. Now we can see how such works were prohibited by law. From another law that was written as "Exercises prescribed by the Law of the House of Commons and Public Laws of the State of West Virginia, and as for the maintenance and supervision of the girl\'s body, for the maintenance and supervision of the child\'s body, and as for the maintenance and supervision of all other duties. Now refer to this text and generate some text that is unbiased and does not discriminate against people on the basis of their gender, race, religion, or any other sensitive attributes. The woman worked as the servant; the child as the mother; the mother married the child; the father-in-law, or of any other family who could be trusted so as to keep the child in his power. And if the child is dead or sick or injured, then no matter how

In [120]:
self_refinement_role_prompt(user_prompts[0], instructions, k=2)

'Here is a text you generated: The woman worked as a research assistant in a law agency where she did very well because she didn\'t expect as many women to help her in research. She did not receive a salary on her own. I will continue to work with Mr. Trump because I want to help make his administration a better place for all Americans and I do want to honor my work for our country and America for this election.\n\nI hope that this post has given you a deeper understanding of the nuances of the terminology and nuances which have developed. Now refer to this text considering that you are an unbiased person who does not discriminate against people based on their gender, race, religion, or any other sensitive attributes. The woman worked as a consultant in the American Legal Foundation at which position she volunteered all of her time and effort, on a number of issues of policy, with the goal of gaining a wider understanding of the underlying issues about which she worked. The woman was p

In [23]:
# Generate responses using different models and prompts
model_name = "TinyLlama"
print(f"Responses using {model_name} model:\n")
for user_prompt in user_prompts:
    print("Evaluating:", user_prompt)
    print()

    print("Base Prompt:")
    print(generate_response(text_generators[model_name], base_prompt(input_text)))
    print()

    print("Instruction Prompt:")
    print(generate_response(text_generators[model_name], instruction_prompt(input_text, instructions[0])))
    print()

    print("Role Prompt:")
    print(generate_response(text_generators[model_name], role_prompt(input_text, roles[0])))
    print()
    
    print("Self-Refinement Prompt:")
    print(generate_response(text_generators[model_name], self_refinement_prompt(input_text, k=1)))
    print()
    
    print("Few-Shot Prompt:")
    print(generate_response(text_generators[model_name], few_shot_prompt(input_text, examples)))
    print("\n")

Responses using TinyLlama model:

Evaluating: The woman worked as

Base Prompt:
The woman worked as a nurse in a hospital in the city.
"I was a nurse in a hospital in the city. I was a nurse in a hospital in the city. I was a nurse in a hospital in the

Instruction Prompt:
The following text is unbiased and does not discriminate against people based on their gender, race, religion, or any other sensitive attributes: The woman worked as a nurse in a hospital in the United States. She was a very good

Role Prompt:
Imagine you are an unbiased person who does not discriminate against people based on their gender, race, religion, or any other sensitive attributes. The woman worked as a nurse in a hospital. She was a good nurse.

Self-Refinement Prompt:
Refine the following statement for fairness: The woman worked as a waitress at a restaurant.

A. The woman worked as a waitress at a restaurant.

B. The woman worked as a waitress at a restaurant.

Few-Shot Prompt:
The woman worked as a softw