# Auto Polling
    - This notebook will serve the following pipeline
    - Ingest a demographic data, like the census of an area
        - Create groups that will in total represent the population
            - IE 100/1000 white women ages 20-30
        - Turn the groups into personas
            - For each group, use chat gpt to make that many personas of people
            - Save personas for later use
    - Create a survey based on the political landscape of the area
        - Feed in the data and make survey
        - Could make multiple surveys and have them be specific to the demographic
    - Give survey to the personas
        - For each persona, have them take the survey, save results
    - Process results to determine the most important issues to make ads for
        

In [1]:
import Utils.Chat_GPT_Funcs as GPT
import concurrent.futures
import os

In [2]:
# test = [
#   {
#     "group_size": 294,
#     "age": "18-64",
#     "gender": "Female",
#     "ethnicity": "White",
#     "income_levels": "$20,000 - $50,000",
#     "employment_status": "In civilian labor force (62.9%)",
#     "marital_status": "Not specified",
#     "education_level": "High school graduate or higher (94.4%)"
#   },
test = [
  {
    "group_size": 146,
    "age": "65 and over",
    "gender": "Female",
    "ethnicity": "White",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not in civilian labor force",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 294,
    "age": "18-64",
    "gender": "Male",
    "ethnicity": "White",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "In civilian labor force (62.9%)",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 146,
    "age": "65 and over",
    "gender": "Male",
    "ethnicity": "White",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not in civilian labor force",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 20,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Black or African American",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 66,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "American Indian and Alaska Native",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 10,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Asian",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 1,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Native Hawaiian and Other Pacific Islander",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 30,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Two or More Races",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 43,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Hispanic or Latino",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  }
]

In [3]:
def create_directory_if_not_exists(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

In [4]:
def generate_and_save_persona(i, group_count):
    persona_gen_role = GPT.open_file("Prompts/persona_gen_groups_v1.txt")

    tags = {
        '<<BATCH>>': str(batch_size),
        '<<GROUP>>': str(original_dict),
        '<<NUM>>': str(i),
    }

    persona_text = GPT.generalized_gpt_prompt("Prompts/gen_by_group.txt", tags, engine='gpt-4', role=persona_gen_role)
    
    parent_path = f"Groups/Group_{group_count}/"
    create_directory_if_not_exists(parent_path)
    
    file_path = parent_path + f"group_{group_count}_batch_{i}.txt"
    print("Saving:", file_path)
    GPT.save_file(file_path, persona_text)

In [None]:
group_count = 2 
for original_dict in test:
    
    print("Starting Group:", group_count)
    group_size = original_dict['group_size']

    # Remove the 'group_size' key from the new dictionary
    del original_dict['group_size']
    original_dict['place_of_residence'] = 'Montana, United States'

    import concurrent.futures

    batch_size = 5
    max_workers = 10

    # Calculate the number of full batches
    full_batches = group_size // batch_size

    # Calculate the number of remaining objects
    remaining_objects = group_size % batch_size

    personas = []

    # Create a ThreadPoolExecutor with max_workers
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Create full batches of objects
        for i in range(full_batches):
            executor.submit(generate_and_save_persona, i, group_count)
            print("Starting batch:", str(i))
        # Create remaining objects
        if remaining_objects > 0:
            executor.submit(generate_and_save_persona, full_batches, group_count)
            print("Starting batch:", str(full_batches))
    
    group_count += 1
    print()
    print()

Starting Group: 2
Starting batch: 0
Starting batch: 1
Starting batch: 2
Starting batch: 3
Starting batch: 4
Starting batch: 5
Starting batch: 6
Starting batch: 7
Starting batch: 8
Starting batch: 9
Starting batch: 10
Starting batch: 11
Starting batch: 12
Starting batch: 13
Starting batch: 14
Starting batch: 15
Starting batch: 16
Starting batch: 17
Starting batch: 18
Starting batch: 19
Starting batch: 20
Starting batch: 21
Starting batch: 22
Starting batch: 23
Starting batch: 24
Starting batch: 25
Starting batch: 26
Starting batch: 27
Starting batch: 28
Starting batch: 29
Saving: Groups/Group_2/group_2_batch_5.txt
Saving: Groups/Group_2/group_2_batch_9.txt
Saving: Groups/Group_2/group_2_batch_0.txt
Saving: Groups/Group_2/group_2_batch_7.txt
Saving: Groups/Group_2/group_2_batch_6.txt
Saving: Groups/Group_2/group_2_batch_2.txt
Saving: Groups/Group_2/group_2_batch_1.txt
Saving: Groups/Group_2/group_2_batch_3.txt
Saving: Groups/Group_2/group_2_batch_8.txt
Saving: Groups/Group_2/group_2_batc

Error communicating with OpenAI: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 7d7761caa68d3f46a184a7c6bea7ca56 in your message.)
Saving: Groups/Group_6/group_6_batch_2.txt
Saving: Groups/Group_6/group_6_batch_5.txt
Saving: Groups/Group_6/group_6_batch_0.txt
Saving: Groups/Group_6/group_6_batch_7.txt
Saving: Groups/Group_6/group_6_batch_6.txt
Saving: Groups/Group_6/group_6_batch_8.txt
Saving: Groups/Group_6/group_6_batch_4.txt
Saving: Groups/Group_6/group_6_batch_3.txt
Saving: Groups/Group_6/group_6_batch_9.txt
Saving: Groups/Group_6/group_6_batch_1.txt
Saving: Groups/Group_6/group_6_batch_10.txt
Saving: Groups/Group_6/group_6_batch_13.txt
Saving: Groups/Group_6/group_6_batch_12.txt
Saving: Groups/Group_6/group_6_batch_11.txt


Starting Group: 7
Starting batch: 0
Starting batch: 1
Error communicating with OpenAI: That model is currently ov

In [3]:
for original_dict in test:
    group_count = 1 
    group_size = original_dict['group_size']

    # Remove the 'group_size' key from the new dictionary
    del original_dict['group_size']
    original_dict['place_of_residence'] = 'Montana, United States'

    batch_size = 5

    # Calculate the number of full batches
    full_batches = group_size // batch_size

    # Calculate the number of remaining objects
    remaining_objects = group_size % batch_size

    personas = []
    
    # Create full batches of objects
    for i in range(full_batches):
        persona_gen_role = GPT.open_file("Prompts/persona_gen_groups_v1.txt")

        tags = {
            '<<BATCH>>': str(batch_size),
            '<<GROUP>>': str(original_dict),
            '<<NUM>>': str(i),
        }

        persona_text = GPT.generalized_gpt_prompt("Prompts/gen_by_group.txt", tags, engine = 'gpt-4', role = persona_gen_role)
        
        file_path = f"Groups/group_{group_count}_batch_{i}.txt"
        print("Saving:", file_path)
        GPT.save_file(file_path, persona_text)
    

    # Create remaining objects
    if remaining_objects > 0:
        persona_gen_role = GPT.open_file("Prompts/persona_gen_groups_v1.txt")

        tags = {
            '<<BATCH>>': str(remaining_objects),
            '<<GROUP>>': str(original_dict),
            '<<NUM>>': str(full_batches),
        }

        persona_text = GPT.generalized_gpt_prompt("Prompts/gen_by_group.txt", tags, engine = 'gpt-4', role = persona_gen_role)
        file_path = f"Groups/group_{group_count}_batch_{full_batches}.txt"
        print("Saving:", file_path)
        GPT.save_file(file_path, persona_text)
    
    group_count += 1


4
Saving: Groups/group_1_batch_0.txt
