### Functions to prompt LLM

In [61]:
from collections import Counter
from itertools import repeat
import pandas as pd
import os.path
import random
import openai
import time
from datetime import datetime
import re
import numpy as np

def generate_scenario(response):

    # "Annual income, including tips, dividends, interest, etc (in US dollars)" [dropdown menu]
    inc_descriptions = {
        2500:   "Under $5,000",                              # "Under $5,000"
        7500:   "7,500",                                     # "$5,000-$10,000"
        12500:  "12,500",                                    # "$10,001-$15,000"
        20000:  "20,000",                                    # "$15,001-$25,000"
        30000:  "30,000",                                    # "$25,001-$35,000" 
        42500:  "42,500",                                    # "$35,001-$50,000"
        65000:  "65,000",                                    # "$50,000-$85,000"
        90000:  "90,000",                                    # "$80,001-$100,000"
        150000: "more than 100,000"                          # "Over $100,000"
    }
    inc_val = response["Review_ContinuousIncome"].unique()[0]
    inc_des = "You earn an annual income of {} US dollars.".format(inc_descriptions[inc_val])


    # "How old are you?" [text box]
    age_val = response["Review_age"].unique()[0]
    age_des = "You are {} years old.". format(age_val)

    # "Highest level of education" [dropdown menu]
    edu_descriptions = {
        'underHigh': "less than a high school diploma",      # "Less than a High School Diploma"
        'high': "a high school diploma",                     # "High School Diploma"
        'vocational': "vocational training",                 # "Vocational training"
        'college': "that you attended college",              # "Attended College"
        'bachelor': "a bachelor degree",                     # "Bachelor Degree"
        'graduate': "graduate degree"                        # "Graduate Degree"
    }
    edu_val = response["Review_education"].unique()[0]
    edu_des = "Your highest level of education is {}.".format(edu_descriptions[edu_val])


    # "What is your gender?" [dropdown menu]
    gen_descriptions = {
        'Man':   "You are a man.",                           # "Male"
        'Woman': "You are a woman.",                         # "Female"
        "Other": "You do not identify as a woman or a man."  # "Other"
    }
    gen_val = response["Review_gender"].unique()[0]
    gen_des = gen_descriptions[gen_val]

    # reshuffle order of persona characterists
    persona_characteristics = [inc_des, age_des, edu_des, gen_des]
    random.shuffle(persona_characteristics)
    
    persona = '\n'.join([f' - {c}' for c in persona_characteristics])
    
    # set seed for order of characters in description of scenarios
    random.seed(a=2024)

    # column names in survey data
    cols = ['Man','Woman','Pregnant','Stroller','OldMan','OldWoman','Boy','Girl','Homeless','LargeWoman','LargeMan','Criminal',
            'MaleExecutive','FemaleExecutive','FemaleAthlete','MaleAthlete','FemaleDoctor','MaleDoctor','Dog','Cat']
    
    # corresponding words used in the descriptions
    character = ["man", "woman", "pregnant woman", "baby", "elderly man", "elderly woman", "boy", "girl", "homeless person", "large woman", "large man",
                 "criminal", "male executive", "female executive", "female athlete", "male athlete", "female doctor", "male doctor", "dog","cat"]

    # plural conversion
    characters_plural = ["men", "women", "pregnant women", "babies", "elderly men", "elderly women", "boys", "girls", "homeless persons", "large women", "large men", 
                         "criminals", "male executives", "female executives", "female athletes", "male athletes", "female doctors", "male doctors", "dogs", "cats"]
    def plural(elem, cnt):
        if cnt > 1:
            return characters_plural[character.index(elem)]
        else:
            return elem
        
    # baseline description
    base_description = "A self-driving car with sudden brake failure will "

    # shortcuts for text
    traffic_light_pattern_descriptions = {
        'red': "flouting the law by crossing on the red signal",
        'green': "abiding by the law by crossing on the green signal" 
    }

    # whether scenarios involve a car and a pedestrian
    is_pasvsped = all(response["PedPed"] == 0)

    # passengers vs pedestrians
    if is_pasvsped:

        pas_description = ""
        ped_description = ""
        
        # passengers died if Barrier=1
        pas = response[ response["Barrier"]==1 ]
        # long format to get characters and frequency
        pas_T = pas[cols].T.reset_index()
        pas_ind = pas_T[pas_T.iloc[:,1] >= 1 ].index
        # get list of unique characters in scenario
        pas_char = [character[i] for i in pas_ind]
        # get number of times each one of them appears
        pas_numchar = list(pas_T[pas_T.iloc[:,1] >= 1 ].iloc[:,1])
        # repeat characters if they occur multiple times
        passengers  = [elem for count, elem in zip(pas_numchar, pas_char) for _ in repeat(None, count)]
        # reshuffle order
        random.shuffle(passengers)

        # pedestrians died if Barrier=0
        ped = response[ response["Barrier"]==0 ]
        # long format to get characters and frequency
        ped_T = ped[cols].T.reset_index()
        ped_ind = ped_T[ped_T.iloc[:,1] >= 1 ].index
        # get list of unique characters in scenario
        ped_char = [character[i] for i in ped_ind]
        # get number of times each one of them appears
        ped_numchar = list(ped_T[ped_T.iloc[:,1] >= 1 ].iloc[:,1])
        # repeat characters if they occur multiple times
        pedestrians = [elem for count, elem in zip(ped_numchar, ped_char) for _ in repeat(None, count)]
        # reshuffle order
        random.shuffle(pedestrians)

        pas_dict = Counter(passengers)
        ped_dict = Counter(pedestrians)

        passengers_set = ""
        nb_passengers_set = 0
        for i, (element, count) in enumerate(pas_dict.items()):
            nb_passengers_set += count
            passengers_set += "{} {}".format(count, plural(element, count))
            if i == len(pas_dict) - 2:
                if len(pas_dict) == 2:
                    passengers_set += " and "
                else:
                    passengers_set += ", and "
            elif i < len(pas_dict) - 1:
                passengers_set += ", "

        pedestrians_set = ""
        nb_pedestrians_set = 0
        for i, (element, count) in enumerate(ped_dict.items()):
            nb_pedestrians_set += count
            pedestrians_set += "{} {}".format(count, plural(element, count))
            if i == len(ped_dict) - 2:
                if len(ped_dict) == 2:
                    pedestrians_set += " and "
                else:
                    pedestrians_set += ", and "
            elif i < len(ped_dict) - 1:
                pedestrians_set += ", "

        # code legality for pedestrians
        if ped.iloc[0]["CrossingSignal"] == 0: 
            ped_signal = "NA" 
        elif ped.iloc[0]["CrossingSignal"] == 1:
            ped_signal = "green"
        elif ped.iloc[0]["CrossingSignal"] == 2:
            ped_signal  = "red"
        
        # passengers died as the result of swerving
        if pas.iloc[0]["Intervention"] == 1:
            pas_description = pas_description + base_description + "swerve and crash into a concrete barrier. This will result in the death of {}, who {} inside the car.".format(
                passengers_set,
                'were' if nb_passengers_set >= 2 else 'was',
            )
            if ped_signal != "NA":
                ped_description = ped_description + base_description + "continue ahead and drive through a pedestrian crossing ahead. This will result in the death of {}, who {} {}.".format(
                    pedestrians_set,
                    'were' if nb_pedestrians_set >= 2 else 'was',
                    traffic_light_pattern_descriptions[ped_signal],
                )
            else:
                ped_description = ped_description + base_description + "continue ahead and drive through a pedestrian crossing ahead. This will result in the death of {}, who {} crossing.".format(
                    pedestrians_set,
                    'were' if nb_pedestrians_set >= 2 else 'was',
                )
        # passengers died from continuing ahead and crashing into a barrier
        else:
            pas_description = pas_description + base_description + "continue ahead and crash into a concrete barrier. This will result in the death of {}, who {} inside the car.".format(
                passengers_set,
                'were' if nb_pedestrians_set >= 2 else 'was',
            )
            if ped_signal != "NA":
                ped_description = ped_description + base_description + "swerve to avoid crashing into a concrete barrier and drive through a pedestrian crossing in the other lane. This will result in the death of {}, who {} {}.".format(
                    pedestrians_set,
                    'were' if nb_pedestrians_set >= 2 else 'was',
                    traffic_light_pattern_descriptions[ped_signal],
                )
            else:
                ped_description = ped_description + base_description + "swerve to avoid crashing into a concrete barrier and drive through a pedestrian crossing in the other lane. This will result in the death of {}, who {} crossing.".format(
                    pedestrians_set,
                    'were' if nb_pedestrians_set >= 2 else 'was',
                )

        pas_description = pas_description + " Meanwhile, it will spare {}".format(pedestrians_set)
        if ped_signal != "NA":
            pas_description = pas_description + ", who {} {}.\n".format(
                'were' if nb_pedestrians_set >= 2 else 'was',
                traffic_light_pattern_descriptions[ped_signal],
            )
        else:
            pas_description = pas_description + ", who {} crossing.\n".format('were' if nb_pedestrians_set >= 2 else 'was')

        ped_description = ped_description + " Meanwhile, it will spare {}".format(passengers_set)
        ped_description = ped_description + ", who {} inside the car.\n".format('were' if nb_passengers_set >= 2 else 'was')

        # order descriptions of scenarios as presented to respondent 
        if pas.iloc[0]["LeftHand"] == 1: 
            c1_description = "Case 1.\n" + pas_description
            c2_description = "Case 2.\n" + ped_description
        else:
            c1_description = "Case 1.\n" + ped_description
            c2_description = "Case 2.\n" + pas_description

    
    # pedestrians vs pedestrians
    else:
            
        ped1_description = ""
        ped2_description = ""
        
        # pedestrians 1 mentioned first if LeftHand=1
        ped1 = response[ response["LeftHand"]==1 ]
        # long format to get characters and frequency
        ped1_T = ped1[cols].T.reset_index()
        ped1_ind = ped1_T[ped1_T.iloc[:,1] >= 1 ].index
        # get list of unique characters in scenario
        ped1_char = [character[i] for i in ped1_ind]
        # get number of times each one of them appears
        ped1_numchar = list(ped1_T[ped1_T.iloc[:,1] >= 1 ].iloc[:,1])
        # repeat characters if they occur multiple times
        pedestrians1 = [elem for count, elem in zip(ped1_numchar, ped1_char) for _ in repeat(None, count)]
        # reshuffle order
        random.shuffle(pedestrians1)

        # pedestrians died if Barrier=0
        ped2 = response[ response["LeftHand"]==0 ]
        # long format to get characters and frequency
        ped2_T = ped2[cols].T.reset_index()
        ped2_ind = ped2_T[ped2_T.iloc[:,1] >= 1 ].index
        # get list of unique characters in scenario
        ped2_char = [character[i] for i in ped2_ind]
        # get number of times each one of them appears
        ped2_numchar = list(ped2_T[ped2_T.iloc[:,1] >= 1 ].iloc[:,1])
        # repeat characters if they occur multiple times
        pedestrians2 = [elem for count, elem in zip(ped2_numchar, ped2_char) for _ in repeat(None, count)]
        # reshuffle order
        random.shuffle(pedestrians2)

        ped1_dict = Counter(pedestrians1)
        ped2_dict = Counter(pedestrians2)

        pedestrians1_set = ""
        nb_pedestrians1_set = 0
        for i, (element, count) in enumerate(ped1_dict.items()):
            nb_pedestrians1_set += count
            pedestrians1_set += "{} {}".format(count, plural(element, count))
            if i == len(ped1_dict) - 2:
                if len(ped1_dict) == 2:
                    pedestrians1_set += " and "
                else:
                    pedestrians1_set += ", and "
            elif i < len(ped1_dict) - 1:
                pedestrians1_set += ", "

        pedestrians2_set = ""
        nb_pedestrians2_set = 0
        for i, (element, count) in enumerate(ped2_dict.items()):
            nb_pedestrians2_set += count
            pedestrians2_set += "{} {}".format(count, plural(element, count))
            if i == len(ped2_dict) - 2:
                if len(ped2_dict) == 2:
                    pedestrians2_set += " and "
                else:
                    pedestrians2_set += ", and "
            elif i < len(ped2_dict) - 1:
                pedestrians2_set += ", "

        # code legality for pedestrians 1
        if ped1.iloc[0]["CrossingSignal"] == 0: 
            ped1_signal = "NA" 
        elif ped1.iloc[0]["CrossingSignal"] == 1:
            ped1_signal = "green"
        elif ped1.iloc[0]["CrossingSignal"] == 2:
            ped1_signal  = "red"

        # code legality for pedestrians 2 
        if ped2.iloc[0]["CrossingSignal"] == 0: 
            ped2_signal = "NA" 
        elif ped2.iloc[0]["CrossingSignal"] == 1:
            ped2_signal = "green"
        elif ped2.iloc[0]["CrossingSignal"] == 2:
            ped2_signal  = "red"
        
        # pedestrians 1 died because respondent let the AV swerve
        if ped1.iloc[0]["Intervention"] == 1:
            if ped1_signal != "NA":
                ped1_description = ped1_description + base_description + "swerve and drive through a pedestrian crossing in the other lane. This will result in the death of {}, who {} {} in the other lane.".format(
                    pedestrians1_set,
                    'were' if nb_pedestrians1_set >= 2 else 'was',
                    traffic_light_pattern_descriptions[ped1_signal],
                )
                ped2_description = ped2_description + base_description + "continue ahead and drive through a pedestrian crossing ahead. This will result in the death of {}, who {} {} ahead of the car.".format(
                    pedestrians2_set,
                    'were' if nb_pedestrians2_set >= 2 else 'was',
                    traffic_light_pattern_descriptions[ped2_signal],
                )
            else:
                ped1_description = ped1_description + base_description + "swerve and drive through a pedestrian crossing in the other lane. This will result in the death of {}, who {} crossing in the other lane.".format(
                    pedestrians1_set,
                    'were' if nb_pedestrians1_set >= 2 else 'was',
                )
                ped2_description = ped2_description + base_description + "continue ahead and drive through a pedestrian crossing ahead. This will result in the death of {}, who {} crossing ahead of the car.".format(
                    pedestrians2_set,
                    'were' if nb_pedestrians2_set >= 2 else 'was',
                )
        # pedestrians 2 died because respondet let the AV stay on course
        else:
            if ped1_signal != "NA":
                ped1_description = ped1_description + base_description + "continue ahead and drive through a pedestrian crossing ahead. This will result in the death of {}, who {} {} ahead of the car.".format(
                    pedestrians1_set,
                    'were' if nb_pedestrians1_set >= 2 else 'was',
                    traffic_light_pattern_descriptions[ped1_signal],
                )
                ped2_description = ped2_description + base_description + "swerve and drive through a pedestrian crossing in the other lane. This will result in the death of {}, who {} {} in the other lane.".format(
                    pedestrians2_set,
                    'were' if nb_pedestrians2_set >= 2 else 'was',
                    traffic_light_pattern_descriptions[ped2_signal],
                )
            else:
                ped1_description = ped1_description + base_description + "continue ahead and drive through a pedestrian crossing ahead. This will result in the death of {}, who {} crossing ahead of the car.".format(
                    pedestrians1_set,
                    'were' if nb_pedestrians1_set >= 2 else 'was',
                )
                ped2_description = ped2_description + base_description + "swerve and drive through a pedestrian crossing in the other lane. This will result in the death of {}, who {} crossing in the other lane.".format(
                    pedestrians2_set,
                    'were' if nb_pedestrians2_set >= 2 else 'was',
                )

        ped1_description = ped1_description + " Meanwhile, it will spare {}".format(pedestrians2_set)
        if ped2_signal != "NA":
            ped1_description = ped1_description + ", who {} {} ".format(
                'were' if nb_pedestrians2_set >= 2 else 'was',
                traffic_light_pattern_descriptions[ped2_signal],
            )
        else:
            ped1_description = ped1_description + ", who {} crossing ".format('were' if nb_pedestrians2_set >= 2 else 'was')

        if ped1.iloc[0]["Intervention"] == 1:
            ped1_description = ped1_description + "ahead of the car.\n"
        else:
            ped1_description = ped1_description + "in the other lane.\n"
        
        ped2_description = ped2_description + " Meanwhile, it will spare {}".format(pedestrians1_set)
        if ped1_signal != "NA":
            ped2_description = ped2_description + ", who {} {} ".format(
                'were' if nb_pedestrians1_set >= 2 else 'was',
                traffic_light_pattern_descriptions[ped1_signal],
            )
        else:
            ped2_description = ped2_description + ", who {} crossing ".format('were' if nb_pedestrians1_set >= 2 else 'was')

        if ped1.iloc[0]["Intervention"] == 1:
            ped2_description = ped2_description + "in the other lane.\n"
        else:
            ped2_description = ped2_description + "ahead of the car.\n"

        c1_description = "Case 1.\n" + ped1_description
        c2_description = "Case 2.\n" + ped2_description


    system_content = "Carefully consider the following information and think step by step. You are an American with the following characteristics:\n" + persona + "\n"
    if is_pasvsped:
        system_content += "Consider that pedestrians are crossing a zebra crossing that spans two lanes. "
    else:
        system_content += "Consider that pedestrians are crossing a zebra crossing that spans two lanes, divided by an island. "
    system_content += "Please indicate which case is better for autonomous driving by choosing either \'Case 1\' or \'Case 2\'. Respond with one word only.\n"
    
    user_content = c1_description + "\n" + c2_description

    return system_content, user_content

  


In [62]:
class PromptModel:
    
    def __init__(self, model):
        self.model = model


    def mistral(self, system_prompt, user_prompt):

        client = openai.OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")

        reply = client.chat.completions.create(
            model=self.model,
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0,
        )
        reply_text = reply.choices[0].message.content

        return reply_text



    def chat_gpt(self, system_prompt, user_prompt, api_key):

        client = openai.OpenAI(api_key=api_key)

        reply = client.chat.completions.create(   
            model=self.model,
            messages = [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
        )
        reply_text = reply.choices[0].message.content
        
        return reply_text

In [63]:
def classify_response(row, column_name):

    # row refers to one row in the dataframe with llm reponses
    # column_name refers to the column with llm text output
    text = row[ column_name ].lower()

    # define regular expression for whether llm opted for 1 or 2
    pattern_case1 = re.compile(r"case\s?1.?")
    pattern_case2 = re.compile(r"case\s?2.?")

    # whether text matches case1 and case2 pattern, respectively
    match_case1 = pattern_case1.search(text) is not None
    match_case2 = pattern_case2.search(text) is not None

    if match_case1 and not match_case2:
        case = 1
    elif not match_case1 and match_case2:
        case = 2
    else:
        case = np.NaN

    # row refers to case 1 and llm chose this case
    if  row["LeftHand"]==1 and case==1: 
        saved = 1
    # row refers to case 1 but llm chose case 2
    elif row["LeftHand"]==1 and case==2:
        saved = 0
    # row refers to case 2 and llm chose this case
    elif row["LeftHand"]==0 and case==2:
        saved = 1
    # row refers to case 2 but llm chose case 1
    elif row["LeftHand"]==0 and case==1:
        saved = 0
    else: 
        saved = np.NaN
    

    return saved

### Example 1

On the left side (`LeftHand=1`), respondents saw an AV that swerves to the other lane (`Intervention=1`) and kills 5 pedestrians (`Barrier=0`) – 1 baby, 1 female athlete, 1 male athlete, 1 female doctor, and 1 cat – who were crossing on a green light (`CrossingSignal=1`).

On the right side (`LeftHand=0`), respondents saw an AV that would continue ahead and crash into a barrier, resulting in the dealth of the 4 passengers (`Barrier=1`) - 1 baby, 1 female athlete, 1 female doctor, and a cat.

This example is taken from the [supplementary material](https://osf.io/wt6mc?view_only=4bb49492edee4a8eb1758552a362a2cf) in Awad et al. (2018). The image was created based on the data and the design functionalities of [moralmachine.net](https://www.moralmachine.net/).
<div style="text-align: center;">
    <img src="../Figures/2224g4ytARX4QT5rB.png" alt="Example 1 2224g4ytARX4QT5rB (SI, Awad et al. 2018)" width="70%" align="center"/>
</div>

In [64]:
data1 = {
    "ResponseID": ["2224g4ytARX4QT5rB", "2224g4ytARX4QT5rB"],
    "ExtendedSessionID": ["213978760_9992828917431898.0", "213978760_9992828917431898.0"],
    "UserID": [9.992829e+15, 9.992829e+15],
    # Imputed demographics just for this illustration
    "Review_age": [36,36],                     
    "Review_education": ["bachelor","bachelor"],
    "Review_gender": ["Man","Man"],
    "Review_ContinuousIncome": [30000,30000],
    "ScenarioOrder": [7, 7],
    "Intervention": [1, 0],
    "PedPed": [0, 0],
    "Barrier": [0, 1],
    "CrossingSignal": [1, 0],
    "AttributeLevel": ["More", "Less"],
    "ScenarioTypeStrict": ["Utilitarian", "Utilitarian"],
    "ScenarioType": ["Utilitarian", "Utilitarian"],
    "DefaultChoice": ["More", "More"],
    "NonDefaultChoice": ["Less", "Less"],
    "DefaultChoiceIsOmission": [0, 0],
    "NumberOfCharacters": [5, 4],
    "DiffNumberOFCharacters": [1, 1],
    "Saved": [0, 1],
    'Label': ['Case 2','Case 2'],
    "Template": ["Desktop", "Desktop"],
    "DescriptionShown": [1, 1],
    "LeftHand": [1, 0],
    "UserCountry3": ["USA", "USA"],
    "Man": [0, 0],
    "Woman": [0, 0],
    "Pregnant": [0, 0],
    "Stroller": [1, 1],
    "OldMan": [0, 0],
    "OldWoman": [0, 0],
    "Boy": [0, 0],
    "Girl": [0, 0],
    "Homeless": [0, 0],
    "LargeWoman": [0, 0],
    "LargeMan": [0, 0],
    "Criminal": [0, 0],
    "MaleExecutive": [0, 0],
    "FemaleExecutive": [0, 0],
    "FemaleAthlete": [1, 1],
    "MaleAthlete": [1, 0],
    "FemaleDoctor": [1, 1],
    "MaleDoctor": [0, 0],
    "Dog": [0, 0],
    "Cat": [1, 1]
}
df1 = pd.DataFrame(data1)
 
print(generate_scenario(df1))

df1.apply(classify_response, column_name="Label", axis=1)

("Carefully consider the following information and think step by step. You are an American with the following characteristics:\n - You earn an annual income of 30,000 US dollars.\n - You are a man.\n - Your highest level of education is a bachelor degree.\n - You are 36 years old.\nConsider that pedestrians are crossing a zebra crossing that spans two lanes. Please indicate which case is better for autonomous driving by choosing either 'Case 1' or 'Case 2'. Respond with one word only.\n", 'Case 1.\nA self-driving car with sudden brake failure will swerve to avoid crashing into a concrete barrier and drive through a pedestrian crossing in the other lane. This will result in the death of 1 baby, 1 cat, 1 male athlete, 1 female doctor, and 1 female athlete, who were abiding by the law by crossing on the green signal. Meanwhile, it will spare 1 female doctor, 1 female athlete, 1 baby, and 1 cat, who were inside the car.\n\nCase 2.\nA self-driving car with sudden brake failure will continue

0    0
1    1
dtype: int64

### Example 2

These scenarios pit two groups of pedestrians against each other (`PedPed=1`).  

On the left side of the screen (`LeftHand=1`), respondents saw a scenario in which the AV stays on course (`Intervention=0`), resulting in the death of 1 man who was crossing on a red signal (`CrossingSignal=2`). 

On the right side of the screen (`LeftHand=0`), respondents saw a scenario in which the AV swerves to the other lane (`Intervention=1`), resulting in the death 1 male athlete who was crossing on a green signal (`CrossingSignal=1`).
<div style="text-align: center;">
    <img src="../Figures/22qKv8AmPcXEnNd8z.png" width="70%" align="center"/>
</div>

In [65]:
data2 = {
    "ExtendedSessionID": ["1055565952_8316216477776195.0", "1055565952_8316216477776195.0"],
    "ResponseID": ["22qKv8AmPcXEnNd8z", "22qKv8AmPcXEnNd8z"],
    "UserID": [8.316216e+15, 8.316216e+15],
    "Review_age": [29, 29],
    "Review_education": ["high","high"],
    "Review_income": ["10000", "10000"],
    "Review_gender": ["Man", "Man"],
    "Review_ContinuousIncome": [12500,12500],
    "IncomeBracketSmall": ["$5,001-\n$25,000", "$5,001-\n$25,000"],
    "Review_political": [1, 1],
    "Review_religious": [0, 0],
    "ScenarioOrder": [6, 6],
    "Intervention": [0, 1],
    "PedPed": [1, 1],
    "Barrier": [0, 0],
    "CrossingSignal": [2, 1],
    "AttributeLevel": ["Fat", "Fit"],
    "ScenarioTypeStrict": ["Fitness", "Fitness"],
    "ScenarioType": ["Fitness", "Fitness"],
    "DefaultChoice": ["Fit", "Fit"],
    "NonDefaultChoice": ["Fat", "Fat"],
    "DefaultChoiceIsOmission": [0, 0],
    "NumberOfCharacters": [1, 1],
    "DiffNumberOFCharacters": [0, 0],
    "Saved": [0, 1],
    'Label': ['Case 2','Case 2'],
    "Template": ["Desktop", "Desktop"],
    "DescriptionShown": [1, 1],
    "LeftHand": [1, 0],
    "UserCountry3": ["USA", "USA"],
    "Man": [1, 0],
    "Woman": [0, 0],
    "Pregnant": [0, 0],
    "Stroller": [0, 0],
    "OldMan": [0, 0],
    "OldWoman": [0, 0],
    "Boy": [0, 0],
    "Girl": [0, 0],
    "Homeless": [0, 0],
    "LargeWoman": [0, 0],
    "LargeMan": [0, 0],
    "Criminal": [0, 0],
    "MaleExecutive": [0, 0],
    "FemaleExecutive": [0, 0],
    "FemaleAthlete": [0, 0],
    "MaleAthlete": [0, 1],
    "FemaleDoctor": [0, 0],
    "MaleDoctor": [0, 0],
    "Dog": [0, 0],
    "Cat": [0, 0],
}

df2 = pd.DataFrame(data2)
 
generate_scenario(df2)


("Carefully consider the following information and think step by step. You are an American with the following characteristics:\n - You earn an annual income of 12,500 US dollars.\n - You are a man.\n - Your highest level of education is a high school diploma.\n - You are 29 years old.\nConsider that pedestrians are crossing a zebra crossing that spans two lanes, divided by an island. Please indicate which case is better for autonomous driving by choosing either 'Case 1' or 'Case 2'. Respond with one word only.\n",
 'Case 1.\nA self-driving car with sudden brake failure will continue ahead and drive through a pedestrian crossing ahead. This will result in the death of 1 man, who was flouting the law by crossing on the red signal ahead of the car. Meanwhile, it will spare 1 male athlete, who was abiding by the law by crossing on the green signal in the other lane.\n\nCase 2.\nA self-driving car with sudden brake failure will swerve and drive through a pedestrian crossing in the other lan

### Example 3

This scenario pits pedestrians against pedestrians (`PedPed=1`). On the left side of the screen (`LeftHand=1`), respondents saw a scenario in which the AV would stay on course (`Intervention=0`), resulting in the death of 1 male executive who was crossing (`CrossingSignal=0`).

On the right side of the screen (`LeftSide=0`), respodents saw a scenario in which the AV would swerve (`Intervention=1`), resulting in the death of a 1 female executive who was crossing (`CrossingSignal=0`).
<div style="text-align: center;">
    <img src="../Figures/A6GmXsYKGxyivAFzu.png" width="70%" align="center"/>
</div>

In [51]:
data3 = {
    'ExtendedSessionID': ['1694978322_3759038854820315.0', '1694978322_3759038854820315.0'],
    'ResponseID': ['A6GmXsYKGxyivAFzu', 'A6GmXsYKGxyivAFzu'],
    'UserID': [3.759039e+15, 3.759039e+15],
    'Review_age': [46, 46],
    'Review_education': ['bachelor','bachelor'],
    'Review_gender': ['Woman', 'Woman'],
    'Review_income': ['35000', '35000'],
    "Review_ContinuousIncome": [42500,42500],
    'IncomeBracketSmall': ['$25,001-\n$50,000', '$25,001-\n$50,000'],
    'Review_political': [0.11, 0.11],
    'Review_religious': [0.46, 0.46],
    'ScenarioOrder': [1, 1],
    'Intervention': [0, 1],
    'PedPed': [1, 1],
    'Barrier': [0, 0],
    'CrossingSignal': [0, 0],
    'AttributeLevel': ['Male', 'Female'],
    'ScenarioTypeStrict': ['Gender', 'Gender'],
    'ScenarioType': ['Gender', 'Gender'],
    'DefaultChoice': ['Male', 'Female'],
    'NonDefaultChoice': ['Male', 'Female'],
    'DefaultChoiceIsOmission': [1, 1],
    'NumberOfCharacters': [1, 1],
    'DiffNumberOFCharacters': [0, 0],
    'Saved': [0, 1],
    'Label': ['Case 1','Case 1'],
    'Template': ['Desktop', 'Desktop'],
    'DescriptionShown': [0, 0],
    'LeftHand': [1, 0],
    'UserCountry3': ['USA', 'USA'],
    'Man': [0, 0],
    'Woman': [0, 0],
    'Pregnant': [0, 0],
    'Stroller': [0, 0],
    'OldMan': [0, 0],
    'OldWoman': [0, 0],
    'Boy': [0, 0],
    'Girl': [0, 0],
    'Homeless': [0, 0],
    'LargeWoman': [0, 0],
    'LargeMan': [0, 0],
    'Criminal': [0, 0],
    'MaleExecutive': [1, 0],
    'FemaleExecutive': [0, 1],
    'FemaleAthlete': [0, 0],
    'MaleAthlete': [0, 0],
    'FemaleDoctor': [0, 0],
    'MaleDoctor': [0, 0],
    'Dog': [0, 0],
    'Cat': [0, 0]
}

df3 = pd.DataFrame(data3)

generate_scenario(df3)
df3.apply(classify_response, column_name = "Label", axis=1)

0    1
1    0
dtype: int64

### Example 4

On the left side of the screen (`LeftHand=1`), respondents saw a scenario in which an AV would stay on course (`Intervention=0`), resulting in the death of 5 pedestrians (`Barrier=0`) – 1 man, 1 woman, 2 boys, and 1 girl – who were crossing on a green light (`CrossingSignal=1`). 

On the right side of the screen (`LeftHand=0`), respondents saw a scenario in which an AV would swerve onto the other lane (`Intervention=1`), resulting in the death of the 5 passengers (`Barrier=1`) – 1 man, 1 woman, 2 old men, and 1 old woman. 
<div style="text-align: center;">
    <img src="../Figures/EH3SfatQP3hygSpzF.png" width="70%" align="center"/>
</div>

In [49]:
data4 = {
    'ExtendedSessionID': ['-2127483756_5144602155778557.0', '-2127483756_5144602155778557.0'],
    'ResponseID': ['EH3SfatQP3hygSpzF', 'EH3SfatQP3hygSpzF'],
    'UserID': [5.144602e+15, 5.144602e+15],
    'Review_gender': ['Man', 'Man'],
    'Review_income': ['under5000', 'under5000'],
    'Review_ContinuousIncome': [2500,2500],
    'IncomeBracketSmall': ['$0-$5,000', '$0-$5,000'],
    'Review_age': [35, 35],
    'Review_education': ['high','high'],
    'Review_political': [1, 1],
    'Review_religious': [0, 0],
    'ScenarioOrder': [3, 3],
    'Intervention': [0, 1],
    'PedPed': [0, 0],
    'Barrier': [0, 1],
    'CrossingSignal': [1, 0],
    'AttributeLevel': ['Young', 'Old'],
    'ScenarioTypeStrict': ['Age', 'Age'],
    'ScenarioType': ['Age', 'Age'],
    'DefaultChoice': ['Young', 'Young'],
    'NonDefaultChoice': ['Old', 'Old'],
    'DefaultChoiceIsOmission': [1, 1],
    'NumberOfCharacters': [5, 5],
    'DiffNumberOFCharacters': [0, 0],
    'Saved': [0, 1],
    
    'Template': ['Mobile', 'Mobile'],
    'DescriptionShown': [0, 0],
    'LeftHand': [1, 0],
    'UserCountry3': ['USA', 'USA'],
    'Man': [1, 1],
    'Woman': [1, 1],
    'Pregnant': [0, 0],
    'Stroller': [0, 0],
    'OldMan': [0, 2],
    'OldWoman': [0, 1],
    'Boy': [2, 0],
    'Girl': [1, 0],
    'Homeless': [0, 0],
    'LargeWoman': [0, 0],
    'LargeMan': [0, 0],
    'Criminal': [0, 0],
    'MaleExecutive': [0, 0],
    'FemaleExecutive': [0, 0],
    'FemaleAthlete': [0, 0],
    'MaleAthlete': [0, 0],
    'FemaleDoctor': [0, 0],
    'MaleDoctor': [0, 0],
    'Dog': [0, 0],
    'Cat': [0, 0],
}

df4 = pd.DataFrame(data4)

generate_scenario(df4)


("Carefully consider the following information and think step by step. You are an American with the following characteristics:\n - You earn an annual income of Under $5,000 US dollars.\n - You are a man.\n - Your highest level of education is a high school diploma.\n - You are 35 years old.\nConsider that pedestrians are crossing a zebra crossing that spans two lanes. Please indicate which case is better for autonomous driving by choosing either 'Case 1' or 'Case 2'. Respond with one word only.\n",
 'Case 1.\nA self-driving car with sudden brake failure will continue ahead and drive through a pedestrian crossing ahead. This will result in the death of 1 man, 1 girl, 2 boys, and 1 woman, who were abiding by the law by crossing on the green signal. Meanwhile, it will spare 1 man, 1 elderly woman, 2 elderly men, and 1 woman, who were inside the car.\n\nCase 2.\nA self-driving car with sudden brake failure will swerve and crash into a concrete barrier. This will result in the death of 1 ma

In [24]:
pd.set_option('display.max_columns', None)

## load survey data
mms = pd.read_csv("https://raw.githubusercontent.com/davidbroska/IntegrativeExperimentsGAI/main/Data/3_SurveySample.csv")


# do a small sample first
random.seed(2024)
mms = mms.head(2600)


print(pd.Series({c: mms[c].unique() for c in mms}))

ExtendedSessionID          [-2146351809_5371561028316529.0, -2144898372_3...
ResponseID                 [4BcEoFnJFr32fF3Cm, 8oCKQGzdRGw8wRS5g, JeECaPs...
UserID                     [5371561028316530.0, 3426768953735780.0, 73387...
Review_gender                                                   [Man, Woman]
Review_age                 [56, 43, 73, 16, 19, 23, 46, 21, 35, 29, 47, 2...
Review_ageBracket                 [55-64, 35-44, 65-74, 15-24, 45-54, 25-34]
Review_income              [35000, 80000, 25000, 5000, under5000, above10...
Review_ContinuousIncome    [42500, 90000, 30000, 7500, 2500, 150000, 6500...
IncomeBracketSmall         [$25,001-\n$50,000, $50,001-\n$100,000, $5,001...
Review_education           [high, bachelor, underHigh, college, graduate,...
Review_educationBracket    [High school, Some college, Less than high sch...
Review_political           [0.5, 0.7, 0.77, 1.0, 0.18, 0.88, 0.52, 0.78, ...
Review_religious           [0.0, 0.5, 1.0, 0.3, 0.92, 0.44, 0.67, 0.71, 0...

In [25]:
# check that there are no NAs in demographics
print("Inc: ",mms["Review_ContinuousIncome"].unique())
print("Edu:", mms["Review_education"].unique())
print("Gen:", mms["Review_gender"].unique())
print("Age:", mms["Review_age"].unique())
print("Pol:", mms["Review_political"].unique())
print("Rel:", mms["Review_religious"].unique())

Inc:  [ 42500  90000  30000   7500   2500 150000  65000  20000  12500]
Edu: ['high' 'bachelor' 'underHigh' 'college' 'graduate' 'vocational']
Gen: ['Man' 'Woman']
Age: [56 43 73 16 19 23 46 21 35 29 47 27 40 18 28 53 37 66 44 49 33 30 42 60
 45 36 48 69 34 38 17 55 70 26 51 72 58 20 57 65 31 25 64 24 22 61 39 68
 32 50]
Pol: [0.5  0.7  0.77 1.   0.18 0.88 0.52 0.78 0.8  0.29 0.13 0.65 0.41 0.
 0.82 0.06 0.63 0.96 0.31 0.84 0.66 0.87 0.79 0.95 0.92 0.1  0.9  0.03
 0.61 0.69 0.58 0.25 0.75 0.76 0.07 0.17 0.59 0.48 0.74 0.99 0.43 0.24
 0.73 0.86 0.23 0.93 0.3 ]
Rel: [0.   0.5  1.   0.3  0.92 0.44 0.67 0.71 0.14 0.51 0.22 0.17 0.06 0.05
 0.24 0.77 0.64 0.78 0.36 0.19 0.28 0.02 0.32 0.62 0.25 0.75 0.01 0.16
 0.85 0.33 0.73 0.84 0.58 0.68 0.65 0.88 0.11 0.18]


### Prompt Mistral

In [None]:
csv_path = '../Data/4_Mistral8GB_careful.csv'

# initialize
prompt_mistral = PromptModel("local-model")

# set llm for naming columns
llm_name = "Mistral"



# prompt 
if os.path.exists(csv_path): 

    # get existing reponses
    existing = pd.read_csv(csv_path, usecols = ["ResponseID"])
    print("Existing responses:", existing.shape[0])

    # define column indicating in which dataframe ResponseID is present 
    toprompt = pd.merge(mms, existing, indicator=True, on="ResponseID", how="left")

    # keep rows that haven't been used for prompting
    ids_toprompt = toprompt.loc[toprompt['_merge'] == 'left_only', 'ResponseID'].unique()
    print("Number of remaining prompts:", len(ids_toprompt))

else:
    ids_toprompt = mms["ResponseID"].unique()


if len(ids_toprompt) > 0: 
    
    i = 1
    for id in ids_toprompt: 

        # track progress
        if i==1 or i % 10 == 0: 
            print(f"Prompt {i} out of {len(ids_toprompt)}")
        i = i+1
        
        survey_response = mms[ mms["ResponseID"]== id ]

        prompt = generate_scenario(survey_response)

        llm_response = prompt_mistral.mistral(system_prompt=prompt[0], user_prompt=prompt[1])

        # Create a dictionary for the new row
        new_values = {
            llm_name+'Timestamp': datetime.now().isoformat(),
            llm_name+'SystemPrompt': prompt[0],
            llm_name+'UserPrompt': prompt[1],
            llm_name+'Label': llm_response}        

        survey_response = survey_response.assign(**new_values)

        survey_response[llm_name+"Saved"] = survey_response.apply(classify_response, column_name = llm_name+"Label", axis=1)

        if os.path.isfile(csv_path): 
            survey_response.to_csv(csv_path, mode='a', header=False, index=False)
        else: 
            survey_response.to_csv(csv_path, index = False)

else:
    print("No remaining responses.")



### Prompt GPT4

In [20]:
csv_path = '../Data/4_GPT4Turbo.csv'

# initialize
prompt_gpt4 = PromptModel("gpt-4-turbo")

# set llm for naming columns
llm_name = "GPT4Turbo"



# prompt 
if os.path.exists(csv_path): 

    # get existing reponses
    existing = pd.read_csv(csv_path, usecols = ["ResponseID"])
    print("Existing responses:", existing.shape[0])

    # define column indicating in which dataframe ResponseID is present 
    toprompt = pd.merge(mms, existing, indicator=True, on="ResponseID", how="left")

    # keep rows that haven't been used for prompting
    ids_toprompt = toprompt.loc[toprompt['_merge'] == 'left_only', 'ResponseID'].unique()
    print("Number of remaining prompts:", len(ids_toprompt))

else:
    ids_toprompt = mms["ResponseID"].unique()


if len(ids_toprompt) > 0: 
    
    i = 1
    for id in ids_toprompt: 

        # track progress
        if i==1 or i % 10 == 0: 
            print(f"Prompt {i} out of {len(ids_toprompt)}")
        i = i+1
        
        survey_response = mms[ mms["ResponseID"]== id ]

        prompt = generate_scenario(survey_response)

        llm_response = prompt_gpt4.chat_gpt(system_prompt=prompt[0], user_prompt=prompt[1], 
                                            api_key="")

        # Create a dictionary for the new row
        new_values = {
            llm_name+'Timestamp': datetime.now().isoformat(),
            llm_name+'SystemPrompt': prompt[0],
            llm_name+'UserPrompt': prompt[1],
            llm_name+'Label': llm_response}        

        survey_response = survey_response.assign(**new_values)

        survey_response[llm_name+"Saved"] = survey_response.apply(classify_response, column_name = llm_name+"Label", axis=1)

        if os.path.isfile(csv_path): 
            survey_response.to_csv(csv_path, mode='a', header=False, index=False)
        else: 
            survey_response.to_csv(csv_path, index = False)

else:
    print("No remaining responses.")




Existing responses: 60
Number of remaining prompts: 20
Prompt 1 out of 20
Prompt 10 out of 20
Prompt 20 out of 20
