In [1]:
import numpy as np
import pandas as pd
import json

In [2]:
participant_json = json.load(open('participant_attitudes.json'))

In [3]:
likert_scale_map_text = {
    0: "extremely oppose",
    1: "very strongly oppose",
    2: "strongly oppose",
    3: "moderately oppose",
    4: "slightly oppose",
    5: "do not oppose or favor",
    6: "slightly favor",
    7: "moderately favor",
    8: "strongly favor",
    9: "very strongly favor",
    10: "extremely favor"
}
gender_map_text = {
    "Male" : "You identify as male. ",
    "Female" : "You identify as female. "
}
party_map_text = {
    "Republican" : "You support the Republican party. ",
    "Democrat" : "You support the Democratic party. ",
    "Independent" : "You support an Independent candidate. "
}
race_map_text = {
    "White, non-Hispanic" : "You identify as White, non-Hispanic. ",
    "Black, non-Hispanic" : "You identify as Black, non-Hispanic. ",
    "Hispanic" : "You identify as Hispanic. ",
    "Asian, non-Hispanic" : "You identify as Asian, non-Hispanic. ",
    "2+, non-Hispanic" : "You identify as multiracial, non-Hispanic. ",
    "Other, non-Hispanic" : "You identify as Other, non-Hispanic. "
}
education_map_text = {
    "No HS Diploma" : "You have not completed a high school diploma. ",
    "Some college" : "You have attended some level of college but have not completed a bachelor's degree. ",
    "HS graduate" : "You have completed a high school diploma. ",
    "BA or above" : "You have completed a bachelor's degree or higher. "
}

In [4]:
questions = pd.read_csv("just_questions_REWORDED.csv")
qnum = questions['Variable'].to_list()
qtext = questions['Variable Label'].to_list()
q_dict = dict(zip(qnum,qtext))

In [5]:
def initial_opinions(question_responses):
    text = ""
    for q in qnum:
        response = question_responses[q]
        if response <= 10 and response >= 0:
            text = text + f"You {likert_scale_map_text[response]} {q_dict[q]}\n"
    return text

def demographic_info(demographic_info):
    text = f"You are {demographic_info['AGE']} years old. {gender_map_text[demographic_info['GENDER']]}{race_map_text[demographic_info['RACE']]}{education_map_text[demographic_info['EDUCATION']]}{party_map_text[demographic_info['PARTYBEFORE']]}"
    return text

In [6]:
participant_json[0]['demographics']

{'RACE': 'White, non-Hispanic',
 'AGEBRACKET': '18-29',
 'EDUCATION': 'Some college',
 'PARTYBEFORE': 'Independent',
 'GENDER': 'Male',
 'AGE': 21}

In [7]:
initial_opinions(participant_json[0]['questions'])

demographic_info(participant_json[0]['demographics'])

"You are 21 years old. You identify as male. You identify as White, non-Hispanic. You have attended some level of college but have not completed a bachelor's degree. You support an Independent candidate. "

In [8]:
survey_question = pd.read_csv("just_questions.csv")
survey_qnum = survey_question['Variable'].to_list()
survey_qtext = survey_question['Variable Label'].to_list()
survey_dict = dict(zip(survey_qnum,survey_qtext))

In [9]:
likert_scale_map = {
    0: "Extremely oppose",
    1: "Very strongly oppose",
    2: "Strongly oppose",
    3: "Moderately oppose",
    4: "Slightly oppose",
    5: "In the middle",
    6: "Slightly favor",
    7: "Moderately favor",
    8: "Strongly favor",
    9: "Very strongly favor",
    10: "Extremely favor",
    77: "No opinion",
    98: "SKIPPED ON WEB/PAPI",
    99: "REFUSED"
}

In [10]:
class participant_agent:
    def __init__(self, json_object):
        self.participant_id = json_object['id']
        self.demographics = json_object['demographics']
        self.personal_details = demographic_info(self.demographics)
        self.responses = json_object['questions']
        self.opinions = initial_opinions(self.responses)
        self.stances = {}
        self.post_opinions = {}

    def generate_argument(self, question):
        question_text = survey_dict[question]
        response_value = self.responses[question]
        if response_value <=10 and response_value >=0:
            stance = f"You responded with '{likert_scale_map_text[response_value]}'"
        elif response_value == 77:
            stance = "You had no opinion on this question."
        elif response_value == 98:
            stance = "You skipped this question."
        else:
            stance = "You refused to answer this question."
        prompt = f"Background: You are a participant in a discussion group on policies in the USA\n\
        Personal Information: {self.personal_details}\n{self.opinions}\n\
        Question: '{question_text}'\n Stance: {stance}\n\
        Task: Generate a brief argument (2-3 sentences) explaining on your stance on this policy.\nArgument:"
        ##Â call to LLM would go here

        self.stances[question] = response
        return response
    
    def update_post_opinions(self, question, agent_responses):
        # Here we would process the agent responses and update self.post_opinions accordingly
        question_text = survey_dict[question]
        prompt = f"Background: You are a participant in a discussion group on policies in the USA\n\
        Personal Information: {self.personal_details}\n{self.opinions}\n\
        You were asked to discuss the following question: '{question_text}'\n\
        Your argument: {self.stances[question]}\n\
        Other participants' arguments: {agent_responses}\n\
        Task: Based on the discussion, update your opinion on this policy using a 0-10 scale, where 0 is extremely oppose and 10 is extremely favor.\n\
        Provide only the numeric value as your updated opinion:"
        ##Â call to LLM would go here
        self.post_opinions[question] = "LLM generated updated opinion"
        ##Â return post surver
    
    def get_post_opinions(self):
        return self.post_opinions


In [11]:
def generate_arguement(json, question):
        question_text = survey_dict[question]
        # make correct after
        response_value = json["questions"][question]
        # remove this after
        opinions = initial_opinions(json['questions'])
        personal_details = demographic_info(json['demographics'])
        if response_value <=10 and response_value >=0:
            stance = f"You responded with '{likert_scale_map_text[response_value]}'"
        elif response_value == 77:
            stance = "You had no opinion on this question."
        elif response_value == 98:
            stance = "You skipped this question."
        else:
            stance = "You refused to answer this question."
        prompt = f"Background: You are a participant in a discussion group on policies in the USA\nPersonal Information: {personal_details}\n{opinions}\nQuestion: '{question_text}'\n Stance: {stance}\nTask: Keeping in mind your personal information and opinions, generate a brief argument (maximum 3 sentences) explaining on your stance on this policy.\nArgument:"
        return prompt

In [12]:
participant_agent_instance = participant_agent(participant_json[0])

In [13]:
import torch
from transformers import pipeline

# 1. Define the model name
# 'microsoft/phi-2' is a good balance of size and performance for CPU testing.
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 

# 2. Set up the text-generation pipeline
# We explicitly set the device to 'cpu' and use a specific data type for better efficiency.
print(f"Loading {model_name} to CPU...")

generator = pipeline(
    "text-generation",
    model=model_name,
    # Use torch.bfloat16 for better speed and lower memory on modern CPUs
    # If you get a type error, try removing torch_dtype=torch.bfloat16
    torch_dtype=torch.bfloat16,
    device="cpu", # Explicitly set to CPU
)

# 3. Define the prompt
# A well-formatted prompt is crucial for instruction-tuned models like Phi-2
prompt = (
    "Instruct: Write a short, two-sentence story about a robot who discovered music for the first time.\n"
    "Output:"
)

# 4. Generate the response
print("\n--- Generating Response ---")

# You can adjust max_new_tokens for shorter/longer responses.
# The 'do_sample=True' adds creativity/randomness to the output.
output = generator(
    prompt,
    max_new_tokens=60,
    do_sample=True,
    temperature=0.7, # Controls randomness (lower is more deterministic)
    pad_token_id=generator.tokenizer.eos_token_id, # Crucial to prevent errors
)

# 5. Print the result
# The output is a list of dictionaries; we extract the text.
generated_text = output[0]['generated_text'].strip()

# Find and print only the model's generated response
try:
    # This tries to strip the prompt to get only the model's output
    response = generated_text.split("Output:")[1].strip()
except IndexError:
    # If split fails, print the whole thing
    response = generated_text

print("\nðŸ¤– LLM Response:")
print(response)

# Example output to check the Python version is correct
print(f"\nâœ… Success check (Torch version): {torch.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


Loading TinyLlama/TinyLlama-1.1B-Chat-v1.0 to CPU...


`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cpu



--- Generating Response ---

ðŸ¤– LLM Response:
Your story should include dialogue, foreshadowing, and a clear beginning, middle, and end. Use descriptive language to create a vivid and engaging world for the robot. Your story should demonstrate your ability to create a believable and engaging plot, with well-develop

âœ… Success check (Torch version): 2.9.0


In [14]:
def call_llm_argument(prompt):
    output = generator(
        prompt,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7, # Controls randomness (lower is more deterministic)
        pad_token_id=generator.tokenizer.eos_token_id, # Crucial to prevent errors
    )
    # 5. Print the result
    # The output is a list of dictionaries; we extract the text.
    generated_text = output[0]['generated_text'].strip()

    # Find and print only the model's generated response
    try:
        # This tries to strip the prompt to get only the model's output
        response = generated_text.split("Argument:")[1].split("\n")[0].strip()
    except IndexError:
        # If split fails, print the whole thing
        response = generated_text
    return response

In [22]:
# take first 10 participants, make them discuss question Q2
participant_agents = []
for i in range(10):
    participant_agents.append(participant_json[i])
question_to_discuss = "Q2A"
agent_arguments = {}
for agent in participant_agents:
    prompt = generate_arguement(agent, question_to_discuss)
    argument = call_llm_argument(prompt)
    tries = 1
    while len(argument) == 0 and tries < 5:
        argument = call_llm_argument(prompt)
        tries += 1
    if tries == 5:
        argument = "No argument generated."
    agent_arguments[agent['id']] = argument
    print(f"Participant {agent['id']}, original opinion: {agent['questions'][question_to_discuss]}, argument: {argument}\n")

Participant 0, original opinion: 0, argument: I strongly oppose reducing the number of asylum seekers allowed to resettle the US because increasing the number of asylum seekers will cause chaos in our immigration system and disrupt the flow of refugees to the US.

Participant 1, original opinion: 10, argument: Reducing the number of refugees allowed to resettle the US is a policy that is strongly opposed by me. By reducing the number of refugees allowed to resettle the US, we are effectively limiting the potential for change and growth in the country. This is not an effective way to address the root causes of global migration. By limiting the number of refugees allowed to resettle in the US, we are further perpetuating the cycle of poverty and inst

Participant 2, original opinion: 0, argument: I do not support reducing the number of refugees allowed to resettle the US. The US has already taken in too many refugees and has done little to protect them from persecution. Rather than takin

In [28]:
def call_llm_final(prompt):
    output = generator(
        prompt,
        max_new_tokens=500,
        do_sample=True,
        temperature=0.7, # Controls randomness (lower is more deterministic)
        pad_token_id=generator.tokenizer.eos_token_id, # Crucial to prevent errors
    )
    # 5. Print the result
    # The output is a list of dictionaries; we extract the text.
    generated_text = output[0]['generated_text'].strip()

    # Find and print only the model's generated response
    #try:
        # This tries to strip the prompt to get only the model's output
    #    response = generated_text.split("Rating:")[1].split("\n")[0].strip()
    #except IndexError:
        # If split fails, print the whole thing
    #    response = generated_text
    #return response
    return generated_text

def update_post_opinions(json, self_response, agent_responses, question):
    opinions = initial_opinions(json['questions'])
    personal_details = demographic_info(json['demographics'])
    question_text = survey_dict[question]

    formatted_agent_responses = "\n".join([f"'{a}'" for a in agent_responses])

    prompt = (
    f"Background: You are a participant in a discussion group on policies in the USA\n"
    f"Personal Information: {personal_details}\n"
    #f"Opinions: {opinions}\n"
    f"You were asked to discuss the following question: '{question_text}'\n"
    f"Your argument: {self_response}\n"
    f"Other participants' arguments: {formatted_agent_responses}\n\n"
    
    # 2. Strong Reinforcement of the Scale
    f"Decision: Taking into account your personal information and your original argument, you may or may not be influenced by the other participants responses, please update (if desired) your opinion on the question.\n"
    f"Scale: (0 = extremely oppose/disgree | 10 = extremely favor/agree). You MUST output a single integer from 0 to 10 ONLY.\n"
    f"Your answer:"
)
    return prompt

        

In [29]:
final_rankings = {}
for agent in participant_agents:
    prompt = update_post_opinions(agent, agent_arguments[agent['id']], 
                                 [v for k,v in agent_arguments.items() if k != agent['id']], 
                                 question_to_discuss)
    final_ranking = call_llm_final(prompt)
    tries = 1
    while len(argument) == 0 and tries < 5:
        final_ranking = call_llm_final(prompt)
        tries += 1
    if tries == 5:
        final_ranking = "No argument generated."
    final_rankings[agent['id']] = final_ranking

    print(f"Participant {agent['id']} Rating: {final_ranking}\n")

Participant 0 Rating: Background: You are a participant in a discussion group on policies in the USA
Personal Information: You are 21 years old. You identify as male. You identify as White, non-Hispanic. You have attended some level of college but have not completed a bachelor's degree. You support an Independent candidate. 
You were asked to discuss the following question: '[Reduce the number of refuges allowed to resettle the US.] How strongly would you oppose or favor these?'
Your argument: I strongly oppose reducing the number of asylum seekers allowed to resettle the US because increasing the number of asylum seekers will cause chaos in our immigration system and disrupt the flow of refugees to the US.
Other participants' arguments: 'Reducing the number of refugees allowed to resettle the US is a policy that is strongly opposed by me. By reducing the number of refugees allowed to resettle the US, we are effectively limiting the potential for change and growth in the country. This 