In [18]:
import openai
import json
import pandas as pd
import os
from dotenv import load_dotenv
from numpy.random import choice
import re
import time
load_dotenv()

# Set OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

def ChatGPT_request(prompt, temp, model="gpt-3.5"): 
  """
  Given a prompt and a dictionary of GPT parameters, make a request to OpenAI
  server and returns the response. 
  ARGS:
    prompt: a str prompt
  RETURNS: 
    a str of GPT-3's response. 
  """

  if model == "gpt-3.5":
    model = "gpt-3.5-turbo"
  elif model == "gpt-4":
    model = "gpt-4"
  else: 
    model = "gpt-3.5-turbo"
  # temp_sleep()
  try: 
    completion = openai.ChatCompletion.create(
    model=model, 
    temperature=temp,
    messages=[
        {
            "role": "user", 
            "content": prompt
        }
      ]
    )
    return completion["choices"][0]["message"]["content"]
  
  except: 
    print ("ChatGPT ERROR")
    return "ChatGPT ERROR"


def persona_ChatGPT_request(prompt, persona_system_message): 
  """
  Given a prompt and a dictionary of GPT parameters, make a request to OpenAI
  server and returns the response. 
  ARGS:
    prompt: a str prompt
    persona_system_message: a str of persona system message
  RETURNS: 
    a str of GPT-3's response. 
  """
  # temp_sleep()
  try: 
    completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo", 
    messages=[
        {
            "role": "system", 
            "content": persona_system_message
        },
        {
            "role": "user", 
            "content": prompt
        }
      ]
    )
    return completion["choices"][0]["message"]["content"]
  
  except: 
    print ("ChatGPT ERROR")
    return "ChatGPT ERROR"
  
def ChatGPT_safe_generate_response(prompt, 
                                   example_output,
                                   special_instruction,
                                   repeat=3,
                                   fail_safe_response="error",
                                   func_validate=None,
                                   func_clean_up=None,
                                   verbose=False): 
  # prompt = 'GPT-3 Prompt:\n"""\n' + prompt + '\n"""\n'
  prompt = '"""\n' + prompt + '\n"""\n'
  prompt += f"Output the response to the prompt above in json. {special_instruction}\n"
  prompt += "Example output json:\n"
  prompt += '{"output": "' + str(example_output) + '"}'

  if verbose: 
    print ("CHAT GPT PROMPT")
    print (prompt)

  for i in range(repeat): 

    try: 
      curr_gpt_response = ChatGPT_request(prompt).strip()
      end_index = curr_gpt_response.rfind('}') + 1
      curr_gpt_response = curr_gpt_response[:end_index]
      curr_gpt_response = json.loads(curr_gpt_response)["output"]

      if func_validate(curr_gpt_response, prompt=prompt): 
        return func_clean_up(curr_gpt_response, prompt=prompt)
      
      if verbose: 
        print ("---- repeat count: \n", i, curr_gpt_response)
        print (curr_gpt_response)
        print ("~~~~")

    except: 
      pass

  return False

def format_paragraph(paragraph):
    sentences = re.split(r'(?<=[.!?])\s+', paragraph)
    formatted_paragraph = '\n'.join(sentences)
    # wrap with triple quotes
    return formatted_paragraph

In [3]:
with open('population_parameters.json') as f:
    parameters = json.load(f)

# Convert percentages to probabilities
for category, distribution in parameters.items():
    total = sum(distribution.values())
    parameters[category] = {key: value / total for key, value in distribution.items()}

def generate_population(N):
    # Function to sample from a distribution
    def sample_distribution(distribution):
        categories = list(distribution.keys())
        probabilities = list(distribution.values())
        return choice(categories, N, p=probabilities)

    # Sampling from each distribution
    population = {key: sample_distribution(value) for key, value in parameters.items()}

    return pd.DataFrame(population)

# Example of generating a population of 100 individuals
sample_population = generate_population(100)
sample_population.head()

Unnamed: 0,Race,Gender,Age,Income,Degree,Community Type,Marital Status
0,Hispanic,Female,30-49,Over $100K,High School,Suburban,Never married
1,White,Female,30-49,Over $100K,Bachelor's or higher,Suburban,Divorced/Separated
2,African American,Female,30-49,$75K-$100K,Some College,Rural,Divorced/Separated
3,Hispanic,Male,65+,$75K-$100K,Bachelor's or higher,Suburban,Married
4,African American,Female,50-64,$50K-$75K,Bachelor's or higher,Suburban,Divorced/Separated


In [47]:
def generate_names_for_group(race, gender, age, count):
    # Prompt for ChatGPT to generate names based on race, gender, and age
    prompt = f"Generate {count} unique names suitable for individuals of {race} race, {gender} gender, and in the age group {age}. Only include name and surname in each new line, dont number each line, add nothing else"
    names_response = ChatGPT_request(prompt, temp=1)
    names = [re.sub(r'^\d+\.\s', '', name) for name in names_response.split('\n')]
    names = [name for name in names if name != '']
    names = list(set(names))
    names = [re.sub(r'[^a-zA-Z ]+', '', name) for name in names]
    return names

def generate_names_for_population(population):
    grouped_population = population.groupby(['Race', 'Gender', 'Age'])
    name_column = [None] * len(population)

    for (race, gender, age), group in grouped_population:
        count = len(group)
        names = generate_names_for_group(race, gender, age, count)
        for idx, name in zip(group.index, names):
            name_column[idx] = name

    population['Name'] = name_column
    return population

# Example usage
population_with_names = generate_names_for_population(sample_population)
population_with_names.head()

Unnamed: 0,Race,Gender,Age,Income,Degree,Community Type,Marital Status,Name
0,Hispanic,Female,30-49,Over $100K,High School,Suburban,Never married,Mara Lpez
1,White,Female,30-49,Over $100K,Bachelor's or higher,Suburban,Divorced/Separated,Sarah Murphy
2,African American,Female,30-49,$75K-$100K,Some College,Rural,Divorced/Separated,Tamara Williams
3,Hispanic,Male,65+,$75K-$100K,Bachelor's or higher,Suburban,Married,Miguel Rodrguez
4,African American,Female,50-64,$50K-$75K,Bachelor's or higher,Suburban,Divorced/Separated,Cheryl Carter
5,White,Female,50-64,Over $100K,Less than HS,Suburban,Never married,Deborah Johnson
6,White,Female,50-64,Over $100K,Bachelor's or higher,Suburban,Married,Nancy Foster
7,African American,Female,30-49,$50K-$75K,Some College,Urban,Never married,Shaniqua Davis
8,African American,Female,18-29,$75K-$100K,Bachelor's or higher,Rural,Married,Maya Jenkins
9,Hispanic,Male,18-29,$50K-$75K,Some College,Urban,Living with partner,Diego Flores


In [49]:
def generate_backstory_prompt(sample_individual):
    prompt = (
        f"Based on the following persona:\n"
        f"Name: {sample_individual['Name']}\n"
        f"Race: {sample_individual['Race']}\n"
        f"Gender: {sample_individual['Gender']}\n"
        f"Age: {sample_individual['Age']}\n"
        f"Income: {sample_individual['Income']}\n"
        f"Degree: {sample_individual['Degree']}\n"
        f"Community Type: {sample_individual['Community Type']}\n"
        f"Marital Status: {sample_individual['Marital Status']}\n"
        f"create a one-paragraph backstory that reflects the characteristics and life experiences of this individual."
    )
    return prompt

sample_individual = sample_population.iloc[0]
backstory_prompt = generate_backstory_prompt(sample_individual)
backstory_response = ChatGPT_request(backstory_prompt, temp=1, model="gpt-4")

#print(backstory_response)
# print each sentence on a new line
for sentence in backstory_response.split("."):
    print(sentence.strip())

Mara Lopez, a self-made Hispanic woman in her mid-forties, has carved out an impressive career for herself despite only holding a high school diploma
Born and raised in a bustling suburban community, she was the first in her family to venture into the corporate world, and she pushed herself to excel from a young age
Influenced by her traditional upbringing and driven by her ambition, she dedicated her time to her work instead of pursuing a college degree
Over the years, her grit and radiant personality led her to climb the corporate ladder swiftly
Mara's hard work has paid off as she now earns an annual income of over $100,000
Despite her career success, she has yet to discover love and has never been married
She appreciates the independence and freedom her lifestyle offers her, yet she occasionally wonders what it feels like to share her life with a significant other

