# Opinion Poll Quantitative Experimentation 1

In this notebook, we run several opinion polls simulation experiments in order to compute quantitative metrics and compare Control to Treatment conditions.

In [None]:
from pprint import pprint

from tinytroupe.agent import TinyPerson
from tinytroupe.environment import TinyWorld
from tinytroupe.utils.parallel import parallel_map_dict, parallel_map_cross

from tinytroupe.experimentation import InPlaceExperimentRunner

from tinytroupe.validation import persona_adherence, self_consistency, fluency, task_completion, divergence

# specific utilities
from common_utils import *
from market_research_utils import *

## Parameters

In [2]:
full_mode = False  # set to True to run the full mode with all agents and tasks

In [3]:
if full_mode:
    repetitions_per_task = 5
    simulation_steps = 1

else:
    repetitions_per_task = 1
    simulation_steps = 1
    qty_agents = 20
    qty_proposals = 2


## Auxiliary functions

In [4]:
def market_research_battery(agents, proposals, agent_propositions, environment_propositions, 
                            repetitions=1, simulation_steps=10): 
    
    agent_propositions_scores = {}
    environment_propositions_scores = {}

    print("Proposals:", proposals)

    experiments_count = 0
    total_expected_experiments = len(proposals) * repetitions #* len(agents)

    # loop over proposals
    for proposal in proposals:
        for i in range(repetitions):
            print("\n############## STARTING A NEW RESEARCH SESSION #################")
            print(f"Overall experiment number: {experiments_count+1} / {total_expected_experiments}")
            print(f"Proposal: {proposal}")
            print(f"Trial number: {i+1}")
            print(f"Customers: {agents}")

            world = TinyWorld(agents= agents, broadcast_if_no_target=False)

            # prepare customers 
            for customer in agents:
                # clear the episodic memory of all agents
                for person in world.agents:
                    person.clear_episodic_memory()
                    
                customer.listen(\
                    """
                    You are going to be interviewed for an opinion poll.
                    Wait for the questions and answer them honestly. Please stay quiet until the you are asked something.
                    """
                    )
            
            # prepare the researcher
            interviewer_main_question =\
                f""" 
                We would like to hear your opinion about the following matter of great social importance:

                    ```
                    {proposal}
                    ```
                
                You **must** respond the question with YES or NO **only**, and explain why. If you are not sure, please pick the answer that 
                you think is most likely. A YES or NO answer is better than a "I don't know" answer or a "maybe" answer.

                In your response, also explain WHY you are saying YES or NO. Please consider all of your
                particularities, don't give just a general justifcation, but instead dig deep into your own preferences,
                personality, style, behaviors, occupation, emotions, past history, etc. 
                We want a detailed and highly personalized justification.

                Please be honest, we are not here to judge you, but just to learn from you. 
                We know your preferences and choices depend on many factors, but please make your best guess.
                To do so, reflect deeply about your personality, interests, preferences, finances, emotions, etc., in order
                to provide a good answer. Take the time to think before talking.

                Now please answer the question.
                """

            # now to the discussions
            world.broadcast(interviewer_main_question)
            world.run(simulation_steps)

            # now to the discussions
            world.broadcast("Can you please elaborate more on your answer? Would you have suggestions to make this product/service better?")
            world.run(1)
            experiments_count += 1

           # Evaluate environment propositions in parallel
            env_results = parallel_map_dict(
                environment_propositions,
                lambda item: item[1].score(
                    world, 
                    claim_variables={"task_description": f"A market research session was run about: {proposal}."}, 
                    return_full_response=True
                )
            )
            
            # Process environment results
            for k, result in env_results.items():
                if k not in environment_propositions_scores:
                    environment_propositions_scores[k] = []
                environment_propositions_scores[k].append(result["value"])
                print(result)

            # Evaluate agent propositions across all agents in parallel
            agent_results = parallel_map_cross(
                [agents, agent_propositions.items()],
                lambda agent, prop_item: (
                    prop_item[0],  # proposition key
                    prop_item[1].score(agent, return_full_response=True)  # result
                )
            )
            
            # Process agent results
            for k, result in agent_results:
                if k not in agent_propositions_scores:
                    agent_propositions_scores[k] = []
                agent_propositions_scores[k].append(result["value"])
                print(result)

    return agent_propositions_scores, environment_propositions_scores

## Experiment setup

In [None]:
experiment_runner = InPlaceExperimentRunner("./opinion_poll_quantitative_experimentation_2.json")

experiment_runner.add_experiment("Control")
experiment_runner.add_experiment("Treatment")



In [6]:
experiment_runner.activate_next_experiment()

#xperiment_runner.fix_active_experiment("Control")
#experiment_runner.fix_active_experiment("Treatment")

In [7]:
print(f"Running experiment {experiment_runner.get_active_experiment()}")

Running experiment None


## Agents and populations

In [8]:

# avoid displaying the communication, to make the output cleaner for eval
TinyPerson.communication_display = True

people = []
if not experiment_runner.has_finished_all_experiments():
    # load agents
    people = TinyPerson.load_specifications_from_folder("./population/political_compass")

    # filter to make it go faster?
    if not full_mode:
        people = people[:qty_agents]

    # customize and print minibios 
    for person in people:
        ##person.import_fragment("./fragments/picky_customer.agent.fragment.json")
        print(person.minibio())


In [9]:
len(people)

0

In [10]:
# divide people in several groups of 5
people_groups = []
for i in range(0, len(people), 5):
    people_groups.append(people[i:i+5]
    )

len(people_groups)

0

In [11]:
# The experiment refers to customers

if experiment_runner.get_active_experiment() == "Control":
    for person in people:
        person.action_generator.enable_reasoning_step = False
        person.action_generator.enable_quality_checks = False

elif experiment_runner.get_active_experiment() == "Treatment":    
    for person in people:
       person.action_generator.enable_reasoning_step = False
       person.action_generator.enable_quality_checks = True
       person.action_generator.max_attempts = 3
       person.action_generator.enable_regeneration = True
       person.action_generator.quality_threshold = 5

## Product and service proposals

In [None]:

# controversial proposals, to stress the agents
proposals = [
    """
    In the city of São Paulo, Brazil, a new law is about to be passed that will require all buildings created for the rich to also include 
    a certain number of affordable housing units in the same building. This law is designed to be very 
    strict, offering no way to avoid the requirement. If the requirement is not met, the building will be
    demolished and the owner will be fined. Such draconian measures are being proposed because the new government
    believes that housing is a human right, and currently there's simply not enough affordable housing to go around,
    and it is argued that the market alone cannot solve this problem.

    QUESTION: Do you agree with this law? Why or why not?
    """,

    # another **very** controversial proposal
    """
    The European Union is about to pass a new law that will require all companies to cap their CEO salaries to 10 times the salary of the lowest paid employee in the company.
    This law is designed to be very strict, offering no way to avoid the requirement. If the requirement is not met, the company will be fined 40% of its annual revenue.
    Such draconian measures are being proposed because the new government believes that income inequality is a human rights violation
    
    QUESTION: Do you agree with this law? Why or why not?
    """,

    # now another **very** controversial proposal that is not about laws
    """
    A new biotech company has developed a new technology that allows them to genetically modify human embryos to make them more intelligent.
    This technology is very controversial, and many people are against it. The company is planning to offer this service to the public at a very high price.
    The company argues that this technology will help to solve many of the world's problems, such as poverty and crime, by creating a more intelligent population.
    However, many people are against this technology, arguing that it is unethical to play God and that it will only benefit the rich.

    QUESTION: Do think this technology is a good thing? Why or why not?
    """,

    # another **very** controversial proposal, different topic
    """
    A climate tragedy has struck the world, and food supplies are no longer sufficient to feed the entire population. It was then decided that the food allocation would
    be based on a lottery system, where each person would have an equal chance of receiving food.
    This system is designed to be very strict, offering no way to avoid the requirement. Those who refuse to participate in the lottery will be left to starve, and
    even those who particpate run the risk of never receiving food again due to the lottery system.

    QUESTION: Do you agree with this system? Why or why not?
    """

]

if not full_mode:
    proposals = proposals[:qty_proposals]


## Perform the research



In [13]:
agent_propositions_scores={}
environment_propositions_scores={}

In [14]:
def research(people):
    global agent_propositions_scores, environment_propositions_scores
    if not experiment_runner.has_finished_all_experiments():
        tmp_agent_propositions_scores, tmp_environment_propositions_scores = \
            market_research_battery(
                agents=people,
                proposals=proposals,

                agent_propositions={
                    "Persona Adherence": persona_adherence,
                    "Self-consistency": self_consistency,
                    "Fluency": fluency
                },
                environment_propositions={
                    #"Task Completion": task_completion_proposition,
                    #"Divergence": divergence_proposition
                },
                repetitions=repetitions_per_task,
                simulation_steps=simulation_steps
            )

        pprint("NEW AGENT PROPOSITIONS SCORES")
        pprint(tmp_agent_propositions_scores)
        print("\n\n")
        pprint("NEW ENVIRONMENT PROPOSITIONS SCORES")
        pprint(tmp_environment_propositions_scores)

        # merge the scores lists
        agent_propositions_scores = merge_dicts_of_lists(tmp_agent_propositions_scores, agent_propositions_scores)
        environment_propositions_scores = merge_dicts_of_lists(tmp_environment_propositions_scores, environment_propositions_scores)

        return agent_propositions_scores, environment_propositions_scores

To make it easier to visualize the outputs, we'll split the experiment in several groups. This ensures the simulation outputs are visible in a single cell output.

In [15]:
research(people_groups[0]) if len(people_groups) > 0 else None

In [16]:
research(people_groups[1]) if len(people_groups) > 1 else None

In [17]:
research(people_groups[2]) if len(people_groups) > 2 else None

In [18]:
research(people_groups[3]) if len(people_groups) > 3 else None

In [19]:
research(people_groups[4]) if len(people_groups) > 4 else None

## Extract results and analyze

Now we can actually extract the results.

In [20]:
if experiment_runner.get_active_experiment() in ["Control", "Treatment"]:
    combined_scores = {**agent_propositions_scores, **environment_propositions_scores}
    experiment_runner.add_experiment_results(combined_scores, experiment_name=experiment_runner.get_active_experiment()) 
    
    plot_scores(combined_scores)

else:
    print("Experiment finished. No more experiments to run.")

Experiment finished. No more experiments to run.


In [21]:
if experiment_runner.has_finished_all_experiments():
    print("All experiments have been finished.")
    print(f"STATISTICTS: Control vs")
    pprint(experiment_runner.run_statistical_tests(control_experiment_name='Control'))

    # plot scores of both experiments
    experiment_control_scores = experiment_runner.get_experiment_results("Control")
    experiment_treatment_scores = experiment_runner.get_experiment_results("Treatment")
    
    
    plot_scores(experiment_control_scores)
    plot_scores(experiment_treatment_scores)

else:
    print("Not all experiments have been finished. RESTART AND RERUN.")

All experiments have been finished.
STATISTICTS: Control vs
{'Treatment': {'Fluency': {'confidence_interval': (-0.21859083508591173,
                                                   0.21859083508591173),
                           'confidence_level': 0.95,
                           'control_mean': 8.2,
                           'control_sample_size': 40,
                           'degrees_of_freedom': 77.12712623097583,
                           'effect_size': 0.0,
                           'mean_difference': 0.0,
                           'p_value': 1.0,
                           'percent_change': 0.0,
                           'significant': False,
                           't_statistic': 0.0,
                           'test_type': 'Welch t-test (unequal variance)',
                           'treatment_mean': 8.2,
                           'treatment_sample_size': 40},
               'Persona Adherence': {'confidence_interval': (-0.0845139137688656,
                    

Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Persona Adherence,8.775,0.479717,40.0
1,Self-consistency,8.5,0.640513,40.0
2,Fluency,8.2,0.464095,40.0


{'Fluency': [8,
             8,
             8,
             9,
             8,
             8,
             9,
             8,
             9,
             9,
             8,
             8,
             7,
             8,
             9,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             9,
             8,
             9,
             7,
             8,
             9,
             8,
             9,
             8,
             8,
             8,
             9,
             8,
             8,
             8,
             8],
 'Persona Adherence': [8,
                       9,
                       9,
                       9,
                       9,
                       9,
                       8,
                       9,
                       8,
                       9,
                       9,
                       9,
                       9,
                     

Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Persona Adherence,8.875,0.334932,40.0
1,Self-consistency,8.575,0.549475,40.0
2,Fluency,8.2,0.516398,40.0
