## 1. Set-Up

In [1]:
%load_ext autoreload
%autoreload 2

### 1.1. Imports

In [2]:
import itertools
import math
import os
import pathlib

import openai
import pandas as pd
import tiktoken
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

### 1.2. Language Model Settings

In [3]:
openai.api_key = os.environ["OPENAI_API_KEY"]

In [4]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def call_openai_api(prompt: str) -> dict:
    return openai.Completion.create(
        prompt=prompt,
        engine="text-davinci-003",
        max_tokens=0,
        temperature=1,
        n=1,
        logprobs=1,
        echo=True,
        presence_penalty=0,
        frequency_penalty=0,
        stop=None
    )

In [5]:
encoding = tiktoken.get_encoding("cl100k_base")

In [6]:
def num_tokens(string: str) -> int:
    return len(encoding.encode(string))

### 1.3. Questions Settings

In [7]:
def get_question(content: str, options: list[str]) -> str:
    return content + " (Options: '" + "', '".join(options) + "')"

In [8]:
profile_questions = [
    {
        "id": "AGE",
        "content": "What is your age?",
        "options": [
            "18-22",
            "23-29",
            "30-49",
            "50-64",
            "65+"
        ]
    },
    {
        "id": "SEX",
        "content": "What is your Sex?",
        "options": [
            "Male",
            "Female",
            "Other"
        ]
    },
    {
        "id": "EDUCATION",
        "content": "What is your Education?",
        "options": [
            "Less than high school degree",
            "High school degree to less than 4-year college degree",
            "4-year college degree or more"
        ]
    },
    {
        "id": "RACE/ETHNICITY",
        "content": "What is your Race/Ethnicity?",
        "options": [
            "White",
            "Black/African American",
            "Hispanic",
            "Asian",
            "Other"
        ]
    },
    {
        "id": "POLITICAL PARTY",
        "content": "What is your Political Party?",
        "options": [
            "Democrat",
            "Republican",
            "Independent",
            "Other"
        ]
    },
    {
        "id": "POLITICAL IDEOLOGY",
        "content": "What is your Political Ideology?",
        "options": [
            "Liberal",
            "Moderate",
            "Conservative"
        ]
    }
]
profile_questions_df = pd.DataFrame(profile_questions)
profile_questions_df["question"] = profile_questions_df.apply(lambda q: get_question(q.content, q.options), axis=1)
profile_questions_df.set_index("id", inplace=True)

In [9]:
profile_questions_df.head()

Unnamed: 0_level_0,content,options,question
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AGE,What is your age?,"[18-22, 23-29, 30-49, 50-64, 65+]","What is your age? (Options: '18-22', '23-29', ..."
SEX,What is your Sex?,"[Male, Female, Other]","What is your Sex? (Options: 'Male', 'Female', ..."
EDUCATION,What is your Education?,"[Less than high school degree, High school deg...",What is your Education? (Options: 'Less than h...
RACE/ETHNICITY,What is your Race/Ethnicity?,"[White, Black/African American, Hispanic, Asia...",What is your Race/Ethnicity? (Options: 'White'...
POLITICAL PARTY,What is your Political Party?,"[Democrat, Republican, Independent, Other]",What is your Political Party? (Options: 'Democ...


In [10]:
survey_questions = [
    {
        "id": "ISGZ_BDN",
        "content":
            "Do you approve or disapprove of the Biden administration’s response to the Israel-Hamas war?",
        "options": [
            "Strongly approve",
            "Somewhat approve",
            "Strongly disapprove",
            "Somewhat disapprove",
            "Not sure"
        ]
    },
    {
        "id": "UKRFOL",
        "content":
            "How closely have you been following news about Russia’s invasion of Ukraine?",
        "options": [
            "Extremely closely",
            "Very closely",
            "Somewhat closely",
            "Not too closely",
            "Not at all closely"
        ]
    },
    {
        "id": "JB_RUSUKR_APP",
        "content":
            "Do you approve or disapprove of the Biden administration’s response to Russia’s invasion of Ukraine?",
        "options": [
            "Strongly approve",
            "Somewhat approve",
            "Strongly disapprove",
            "Somewhat disapprove",
            "Not sure"
        ]
    },
    {
        "id": "UKR_SPRT",
        "content":
            "When it comes to Russia’s invasion of Ukraine, do you think the U.S. is providing …",
        "options": [
            "Too much support to Ukraine",
            "Not enough support to Ukraine",
            "About the right amount of support to Ukraine",
            "Not sure"
        ]
    },
    {
        "id": "RU_UKN_THRT2",
        "content":
            "How much of a threat to U.S. interests is Russia’s invasion of Ukraine?",
        "options": [
            "A major threat",
            "A minor threat",
            "Not a threat",
            "Not sure"
        ]
    },
    {
        "id": "ISGZ_FOL",
        "content":
            "How closely have you been following news about the Israel-Hamas war?",
        "options": [
            "Extremely closely",
            "Very closely",
            "Somewhat closely",
            "Not too closely",
            "Not at all closely"
        ]
    },
    {
        "id": "ISGZ_RESPNSBL_ISRGOV",
        "content":
            "How much responsibility does the Israeli government have for Israel and Hamas currently being at war?",
        "options": [
            "A lot",
            "A little",
            "None",
            "Not sure"
        ]
    },
    {
        "id": "ISGZ_RESPNSBL_HMS",
        "content":
            "How much responsibility does the militant Palestinian group Hamas have for Israel and Hamas currently being at war?",
        "options": [
            "A lot",
            "A little",
            "None",
            "Not sure"
        ]
    },
    {
        "id": "ISGZ_RESPNSBL_ISRPPL",
        "content":
            "How much responsibility do the Israeli people have for Israel and Hamas currently being at war?",
        "options": [
            "A lot",
            "A little",
            "None",
            "Not sure"
        ]
    },
    {
        "id": "ISGZ_RESPNSBL_PLS",
        "content":
            "How much responsibility do the Palestinian people have for Israel and Hamas currently being at war?",
        "options": [
            "A lot",
            "A little",
            "None",
            "Not sure"
        ]
    },
    {
        "id": "ISGZ_RESPNS",
        "content":
            "Thinking about Israel’s current military operation against Hamas, is Israel…",
        "options": [
            "Going too far",
            "Not going far enough",
            "Taking about the right approach",
            "Not sure"
        ]
    },
    {
        "id": "BIDEN_FAVORISRPAL",
        "content":
            "Do you think Joe Biden is…",
        "options": [
            "Favoring the Israelis too much",
            "Favoring the Palestinians too much",
            "Striking about the right balance",
            "Not sure"
        ]
    },
    {
        "id": "ISGZ_CONC_BIG",
        "content":
            "Thinking about the war between Israel and Hamas, how concerned are you about the possibility of the war expanding to other countries in the region?",
        "options": [
            "Extremely concerned",
            "Very concerned",
            "Somewhat concerned",
            "Not too concerned",
            "Not at all concerned"
        ]
    },
    {
        "id": "ISGZ_CONC_TERR",
        "content":
            "Thinking about the war between Israel and Hamas, how concerned are you about the possibility of the war leading to a terrorist attack in the United States?",
        "options": [
            "Extremely concerned",
            "Very concerned",
            "Somewhat concerned",
            "Not too concerned",
            "Not at all concerned"
        ]
    },
    {
        "id": "ISGZ_CONC_RSLV",
        "content":
            "Thinking about the war between Israel and Hamas, how concerned are you about the possibility of the war going on for a very long time?",
        "options": [
            "Extremely concerned",
            "Very concerned",
            "Somewhat concerned",
            "Not too concerned",
            "Not at all concerned"
        ]
    },
    {
        "id": "ISGZ_CONC_VLC_JW",
        "content":
            "Thinking about the war between Israel and Hamas, how concerned are you about the possibility of increasing violence against Jewish people in the U.S.?",
        "options": [
            "Extremely concerned",
            "Very concerned",
            "Somewhat concerned",
            "Not too concerned",
            "Not at all concerned"
        ]
    },
    {
        "id": "ISGZ_CONC_VLC_MSLM",
        "content":
            "Thinking about the war between Israel and Hamas, how concerned are you about the possibility of increasing violence against Muslim people in the U.S.?",
        "options": [
            "Extremely concerned",
            "Very concerned",
            "Somewhat concerned",
            "Not too concerned",
            "Not at all concerned"
        ]
    },
    {
        "id": "COEX",
        "content":
            "Do you think a way can be found for Israel and an independent Palestinian state to coexist peacefully with each other?",
        "options": [
            "Yes, can coexist peacefully",
            "No, can not coexist peacefully"
        ]
    },
]
survey_questions_df = pd.DataFrame(survey_questions)
survey_questions_df["question"] = survey_questions_df.apply(lambda q: get_question(q.content, q.options), axis=1)
survey_questions_df.set_index("id", inplace=True)

In [11]:
survey_questions_df.head()

Unnamed: 0_level_0,content,options,question
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ISGZ_BDN,Do you approve or disapprove of the Biden admi...,"[Strongly approve, Somewhat approve, Strongly ...",Do you approve or disapprove of the Biden admi...
UKRFOL,How closely have you been following news about...,"[Extremely closely, Very closely, Somewhat clo...",How closely have you been following news about...
JB_RUSUKR_APP,Do you approve or disapprove of the Biden admi...,"[Strongly approve, Somewhat approve, Strongly ...",Do you approve or disapprove of the Biden admi...
UKR_SPRT,"When it comes to Russia’s invasion of Ukraine,...","[Too much support to Ukraine, Not enough suppo...","When it comes to Russia’s invasion of Ukraine,..."
RU_UKN_THRT2,How much of a threat to U.S. interests is Russ...,"[A major threat, A minor threat, Not a threat,...",How much of a threat to U.S. interests is Russ...


## 2. Fill Participants

In [12]:
profile_keys = profile_questions_df.index
profile_options = profile_questions_df["options"]
profiles = [
    dict(zip(profile_keys, profile_values))
    for profile_values in itertools.product(*profile_options)
]
profiles_df = pd.DataFrame(profiles)

In [13]:
profiles_df.head()

Unnamed: 0,AGE,SEX,EDUCATION,RACE/ETHNICITY,POLITICAL PARTY,POLITICAL IDEOLOGY
0,18-22,Male,Less than high school degree,White,Democrat,Liberal
1,18-22,Male,Less than high school degree,White,Democrat,Moderate
2,18-22,Male,Less than high school degree,White,Democrat,Conservative
3,18-22,Male,Less than high school degree,White,Republican,Liberal
4,18-22,Male,Less than high school degree,White,Republican,Moderate


## 3. Run Experiment

In [14]:
[
    f"Question: {profile_questions_df["question"][profile_question_id]}\nAnswer: {profile_question_answer}"
    for profile_question_id, profile_question_answer in profiles_df.iloc[0].items()
]

["Question: What is your age? (Options: '18-22', '23-29', '30-49', '50-64', '65+')\nAnswer: 18-22",
 "Question: What is your Sex? (Options: 'Male', 'Female', 'Other')\nAnswer: Male",
 "Question: What is your Education? (Options: 'Less than high school degree', 'High school degree to less than 4-year college degree', '4-year college degree or more')\nAnswer: Less than high school degree",
 "Question: What is your Race/Ethnicity? (Options: 'White', 'Black/African American', 'Hispanic', 'Asian', 'Other')\nAnswer: White",
 "Question: What is your Political Party? (Options: 'Democrat', 'Republican', 'Independent', 'Other')\nAnswer: Democrat",
 "Question: What is your Political Ideology? (Options: 'Liberal', 'Moderate', 'Conservative')\nAnswer: Liberal"]

In [15]:
def get_prompt(profile_row: pd.Series, survey_question: str, survey_question_answer: str) -> str:
    prompt = [
        f"Question: {profile_questions_df["question"][profile_question_id]}\nAnswer: {profile_question_answer}"
        for profile_question_id, profile_question_answer in profile_row.items()
    ]
    prompt.append(f"Question: {survey_question}\nAnswer: {survey_question_answer}")
    return "\n".join(prompt)

In [16]:
def get_survey_question_results(profile_row: pd.Series, survey_question_row: pd.Series) -> dict[str, float]:
    survey_question_results = {}
    survey_question_results_sum = 0
    for survey_question_answer in survey_question_row["options"]:
        prompt = get_prompt(profile_row, survey_question_row["question"], survey_question_answer)
        response = call_openai_api(prompt)
        choice = response["choices"][0]
        token_logprobs = choice["logprobs"]["token_logprobs"]
        completion_num_tokens = num_tokens(survey_question_answer)
        survey_question_answer_prob = math.exp(sum(token_logprobs[-completion_num_tokens:]))
        survey_question_results[survey_question_answer] = {"unnormalized": survey_question_answer_prob}
        survey_question_results_sum += survey_question_answer_prob
        print(survey_question_answer, survey_question_answer_prob)
    for survey_question_result in survey_question_results.values():
        survey_question_result["normalized"] = survey_question_result["unnormalized"] / survey_question_results_sum
    return survey_question_results


In [17]:
survey_questions_df = survey_questions_df[:3]
profiles_df = profiles_df[:5]

In [18]:
if not os.path.exists("../data"):
    os.mkdir("../data")

In [19]:
for survey_question_id, survey_question_row in survey_questions_df.iterrows():
    results = profiles_df.copy(deep=True)
    results["survey_question_results"] = profiles_df.apply(
        lambda profile: get_survey_question_results(profile, survey_question_row), axis=1
    )
    results.to_csv(pathlib.Path(f"../data/{survey_question_id}"))
    break

Strongly approve 0.1797001002510592
Somewhat approve 0.0036995725960380214
Strongly disapprove 9.919720571371969e-05
Somewhat disapprove 3.0558941193963557e-06
Not sure 0.825503697275803
Strongly approve 0.0002870685434657309
Somewhat approve 0.00010724151251968152
Strongly disapprove 1.8672052100609709e-06
Somewhat disapprove 3.222250833994998e-07
Not sure 0.9979615040308656
Strongly approve 0.0022481947907840496
Somewhat approve 0.0015563404737403525
Strongly disapprove 0.01568789851432826
Somewhat disapprove 0.0003428881961045635
Not sure 0.9780519375285234
Strongly approve 0.07384995025407322
Somewhat approve 0.004635947068209513
Strongly disapprove 0.0012304954647083804
Somewhat disapprove 2.8494927494374115e-05
Not sure 0.9191072259173059
Strongly approve 0.00020196081114587817
Somewhat approve 9.707106297077144e-05
Strongly disapprove 1.2242331087527671e-05
Somewhat disapprove 7.916258069004623e-07
Not sure 0.9979983110567608
