# Auto Polling
    - This notebook will serve the following pipeline
    - Ingest a demographic data, like the census of an area
        - Create groups that will in total represent the population
            - IE 100/1000 white women ages 20-30
        - Turn the groups into personas
            - For each group, use chat gpt to make that many personas of people
            - Save personas for later use
    - Create a survey based on the political landscape of the area
        - Feed in the data and make survey
        - Could make multiple surveys and have them be specific to the demographic
    - Give survey to the personas
        - For each persona, have them take the survey, save results
    - Process results to determine the most important issues to make ads for
        

In [1]:
import Utils.Chat_GPT_Funcs as GPT
import concurrent.futures
import os

In [6]:
# test = [
#   {
#     "group_size": 294,
#     "age": "18-64",
#     "gender": "Female",
#     "ethnicity": "White",
#     "income_levels": "$20,000 - $50,000",
#     "employment_status": "In civilian labor force (62.9%)",
#     "marital_status": "Not specified",
#     "education_level": "High school graduate or higher (94.4%)"
#   },
"""
test = [
  {
    "group_size": 146,
    "age": "65 and over",
    "gender": "Female",
    "ethnicity": "White",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not in civilian labor force",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 294,
    "age": "18-64",
    "gender": "Male",
    "ethnicity": "White",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "In civilian labor force (62.9%)",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 146,
    "age": "65 and over",
    "gender": "Male",
    "ethnicity": "White",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not in civilian labor force",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
    """
test = [
  {
    "group_size": 20,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Black or African American",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 66,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "American Indian and Alaska Native",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 10,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Asian",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 1,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Native Hawaiian and Other Pacific Islander",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 30,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Two or More Races",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  },
  {
    "group_size": 43,
    "age": "18 and over",
    "gender": "Not specified",
    "ethnicity": "Hispanic or Latino",
    "income_levels": "$20,000 - $50,000",
    "employment_status": "Not specified",
    "marital_status": "Not specified",
    "education_level": "High school graduate or higher (94.4%)"
  }
]

In [3]:
def create_directory_if_not_exists(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

In [7]:
def generate_and_save_persona(i, group_count):
    persona_gen_role = GPT.open_file("Prompts/persona_gen_groups_v1.txt")

    tags = {
        '<<BATCH>>': str(batch_size),
        '<<GROUP>>': str(original_dict),
        '<<NUM>>': str(i),
    }

    persona_text = GPT.generalized_gpt_prompt("Prompts/gen_by_group.txt", tags, engine='gpt-4', role=persona_gen_role)
    
    parent_path = f"Groups/Group_{group_count}/"
    create_directory_if_not_exists(parent_path)
    
    file_path = parent_path + f"group_{group_count}_batch_{i}.txt"
    print("Saving:", file_path)
    GPT.save_file(file_path, persona_text)

In [8]:
group_count = 5
for original_dict in test:
    
    print("Starting Group:", group_count)
    group_size = original_dict['group_size']

    # Remove the 'group_size' key from the new dictionary
    del original_dict['group_size']
    original_dict['place_of_residence'] = 'Montana, United States'

    import concurrent.futures

    batch_size = 5
    max_workers = 10

    # Calculate the number of full batches
    full_batches = group_size // batch_size

    # Calculate the number of remaining objects
    remaining_objects = group_size % batch_size

    personas = []

    # Create a ThreadPoolExecutor with max_workers
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Create full batches of objects
        for i in range(full_batches):
            executor.submit(generate_and_save_persona, i, group_count)
            print("Starting batch:", str(i))
        # Create remaining objects
        if remaining_objects > 0:
            executor.submit(generate_and_save_persona, full_batches, group_count)
            print("Starting batch:", str(full_batches))
    
    group_count += 1
    print()
    print()

Starting Group: 5
Starting batch: 0
Starting batch: 1
Starting batch: 2
Starting batch: 3
Saving: Groups/Group_5/group_5_batch_1.txt
Saving: Groups/Group_5/group_5_batch_3.txt
Saving: Groups/Group_5/group_5_batch_0.txt
Saving: Groups/Group_5/group_5_batch_2.txt


Starting Group: 6
Starting batch: 0
Starting batch: 1
Starting batch: 2
Starting batch: 3
Starting batch: 4
Starting batch: 5
Starting batch: 6
Starting batch: 7
Starting batch: 8
Starting batch: 9
Starting batch: 10
Starting batch: 11
Starting batch: 12
Starting batch: 13
Error communicating with OpenAI: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID dff06c81cd83cb110c77df2b1c6238ef in your message.)
Saving: Groups/Group_6/group_6_batch_2.txt
Saving: Groups/Group_6/group_6_batch_4.txt
Saving: Groups/Group_6/group_6_batch_0.txt
Saving: Groups/Group_6/group_6_batch_5.txt
Saving: Gro

In [9]:
def dict_to_paragraph(persona):
    priorities = ', '.join(persona["Top Priorities"])
    pain_points = ', '.join(persona["Pain points"])
    return (f"Your name is {persona['Name']}, you are a {persona['Age']} year old {persona['Gender']}. "
            f"You identify as {persona['Ethnicity']}, and your annual income is ${persona['Income']}. "
            f"Your marital status is {persona['Marital Status']}, and your highest level of education is {persona['Education Level']}. "
            f"You work as a {persona['Occupation']}. You are described as '{persona['Description']}'. "
            f"Your top priorities are {priorities}. You are facing some challenges, including {pain_points}.")


In [14]:
import os
import glob

parent_path = "Groups/"
# Define the directories to go through
directories = ['Groups/Group_1', 'Groups/Group_2', 'Groups/Group_3', 'Groups/Group_4']

# Define the string to find and the string to replace it with
find_string = '"Nationality": "American",'
#find_string1 = '"Nationality": "White",'
replace_string = '"Ethnicity": "White",'

# For each directory
for directory in directories:
    # Use a wildcard (*) to find all .txt files in the directory
    for file_name in glob.glob(directory + '/*.txt'):
        print(file_name)
        # Open each file in read mode to get the data
        file_data = GPT.open_file(file_name)
        
        # Replace the find_string with replace_string
        new_data = file_data.replace(find_string, replace_string)
        new_data = new_data.replace(find_string1, replace_string)
        
        #print(new_data)
        
        # Open the file in write mode to overwrite the original data with the new data
        GPT.save_file(file_name, new_data)

print("Replacement complete.")



Groups/Group_1/group_1_batch_28.txt
Groups/Group_1/group_1_batch_14.txt
Groups/Group_1/group_1_batch_15.txt
Groups/Group_1/group_1_batch_29.txt
Groups/Group_1/group_1_batch_17.txt
Groups/Group_1/group_1_batch_16.txt
Groups/Group_1/group_1_batch_12.txt
Groups/Group_1/group_1_batch_13.txt
Groups/Group_1/group_1_batch_11.txt
Groups/Group_1/group_1_batch_39.txt
Groups/Group_1/group_1_batch_38.txt
Groups/Group_1/group_1_batch_10.txt
Groups/Group_1/group_1_batch_48.txt
Groups/Group_1/group_1_batch_49.txt
Groups/Group_1/group_1_batch_8.txt
Groups/Group_1/group_1_batch_58.txt
Groups/Group_1/group_1_batch_9.txt
Groups/Group_1/group_1_batch_7.txt
Groups/Group_1/group_1_batch_42.txt
Groups/Group_1/group_1_batch_56.txt
Groups/Group_1/group_1_batch_57.txt
Groups/Group_1/group_1_batch_43.txt
Groups/Group_1/group_1_batch_6.txt
Groups/Group_1/group_1_batch_4.txt
Groups/Group_1/group_1_batch_55.txt
Groups/Group_1/group_1_batch_41.txt
Groups/Group_1/group_1_batch_40.txt
Groups/Group_1/group_1_batch_54.t