<a href="https://colab.research.google.com/github/UrologyUnbound/SIOP_ML_2024_Discord/blob/main/colabs/Fairness.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Identifying Fairness Perceptions Notebook
This notebook is designed to tackle the challenge of identifying which policy received the majority vote as the fairer option.

## Challenge Description
Respondents compared two organizational policies and voted on which was fairest.  

In [None]:
!pip install pandas langchain langchain_openai

In [2]:
import pandas as pd
import os
from langchain import FewShotPromptTemplate, PromptTemplate
from langchain_openai import ChatOpenAI
from google.colab import userdata

In [3]:
fairness_train_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/train/fairness_train.csv")
fairness_test_data = pd.read_csv("https://raw.githubusercontent.com/UrologyUnbound/SIOP_ML_2024_Discord/main/data/test/fairness_test_public.csv")

In [4]:
# Manually add env key to the `api_key` argument
llm = ChatOpenAI(api_key= userdata.get('OPENAI_API_KEY'), model_name="gpt-3.5-turbo", temperature=0.0)

In [5]:
instructions = """
Given the first and second options, your task is to predict which option received the majority vote as the fairer option. T
he output should not make up information and not reference these given instructions or context; only output the answer.
"""

In [6]:
def extract_examples_fairness(dataset_row):
    # Extract first_option, second_option, and majority_vote from each row of the dataset
    first_option = dataset_row["first_option"]
    second_option = dataset_row["second_option"]
    majority_vote = dataset_row["majority_vote"]

    return [{"first_option": first_option, "second_option": second_option, "majority_vote": majority_vote}]

def create_examples_fairness(dataset):
    # Extract examples from the first three rows of the dataset
    examples = []
    for i in range(3):
        examples.extend(extract_examples_fairness(dataset.loc[i]))

    return examples

def create_example_prompt_fairness():
    # Create a formatter for the examples
    example_prompt = PromptTemplate(
        input_variables=["first_option", "second_option", "majority_vote"],
        template="First Option: {first_option}\nSecond Option: {second_option}\nMajority Vote: {majority_vote}"
    )

    return example_prompt

def create_template_fairness(dataset_row):
    # Generate a few shot prompt template
    examples = create_examples_fairness(dataset_row)

    template = FewShotPromptTemplate(
        examples=examples,
        example_prompt=create_example_prompt_fairness(),
        prefix=instructions,
        suffix="First Option: {first_option}\nSecond Option: {second_option}",
        input_variables=["first_option", "second_option"],
    )

    return template

In [7]:
# Create templates and fetch the first_option and second_option from the test dataset
formatted_prompts = []

for i in range(len(fairness_train_data)):
    prompt_template = create_template_fairness(fairness_train_data)

    for j in range(len(fairness_test_data)):

        formatted_prompt = prompt_template.format(first_option=fairness_test_data.loc[j, "first_option"], second_option=fairness_test_data.loc[j, "second_option"])
        formatted_prompts.append(formatted_prompt)

In [8]:
llm.invoke(formatted_prompts[0])

AIMessage(content='Majority Vote: first', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 231, 'total_tokens': 236}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None})