In [1]:
import openai
import json
import pandas as pd
import os
from dotenv import load_dotenv
from numpy.random import choice
load_dotenv()

# Set OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

def ChatGPT_request(prompt): 
  """
  Given a prompt and a dictionary of GPT parameters, make a request to OpenAI
  server and returns the response. 
  ARGS:
    prompt: a str prompt
  RETURNS: 
    a str of GPT-3's response. 
  """
  # temp_sleep()
  try: 
    completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo", 
    messages=[
        {
            "role": "user", 
            "content": prompt
        }
      ]
    )
    return completion["choices"][0]["message"]["content"]
  
  except: 
    print ("ChatGPT ERROR")
    return "ChatGPT ERROR"


def persona_ChatGPT_request(prompt, persona_system_message): 
  """
  Given a prompt and a dictionary of GPT parameters, make a request to OpenAI
  server and returns the response. 
  ARGS:
    prompt: a str prompt
    persona_system_message: a str of persona system message
  RETURNS: 
    a str of GPT-3's response. 
  """
  # temp_sleep()
  try: 
    completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo", 
    messages=[
        {
            "role": "system", 
            "content": persona_system_message
        },
        {
            "role": "user", 
            "content": prompt
        }
      ]
    )
    return completion["choices"][0]["message"]["content"]
  
  except: 
    print ("ChatGPT ERROR")
    return "ChatGPT ERROR"
  
def ChatGPT_safe_generate_response(prompt, 
                                   example_output,
                                   special_instruction,
                                   repeat=3,
                                   fail_safe_response="error",
                                   func_validate=None,
                                   func_clean_up=None,
                                   verbose=False): 
  # prompt = 'GPT-3 Prompt:\n"""\n' + prompt + '\n"""\n'
  prompt = '"""\n' + prompt + '\n"""\n'
  prompt += f"Output the response to the prompt above in json. {special_instruction}\n"
  prompt += "Example output json:\n"
  prompt += '{"output": "' + str(example_output) + '"}'

  if verbose: 
    print ("CHAT GPT PROMPT")
    print (prompt)

  for i in range(repeat): 

    try: 
      curr_gpt_response = ChatGPT_request(prompt).strip()
      end_index = curr_gpt_response.rfind('}') + 1
      curr_gpt_response = curr_gpt_response[:end_index]
      curr_gpt_response = json.loads(curr_gpt_response)["output"]

      if func_validate(curr_gpt_response, prompt=prompt): 
        return func_clean_up(curr_gpt_response, prompt=prompt)
      
      if verbose: 
        print ("---- repeat count: \n", i, curr_gpt_response)
        print (curr_gpt_response)
        print ("~~~~")

    except: 
      pass

  return False

In [3]:
with open('population_parameters.json') as f:
    parameters = json.load(f)

# Convert percentages to probabilities
for category, distribution in parameters.items():
    total = sum(distribution.values())
    parameters[category] = {key: value / total for key, value in distribution.items()}

def generate_population(N):
    # Function to sample from a distribution
    def sample_distribution(distribution):
        categories = list(distribution.keys())
        probabilities = list(distribution.values())
        return choice(categories, N, p=probabilities)

    # Sampling from each distribution
    population = {key: sample_distribution(value) for key, value in parameters.items()}

    return pd.DataFrame(population)

# Example of generating a population of 100 individuals
sample_population = generate_population(100)
sample_population.head()

Unnamed: 0,Race,Gender,Age,Income,Degree,Community Type,Marital Status
0,Hispanic,Female,30-49,Over $100K,High School,Suburban,Never married
1,White,Female,30-49,Over $100K,Bachelor's or higher,Suburban,Divorced/Separated
2,African American,Female,30-49,$75K-$100K,Some College,Rural,Divorced/Separated
3,Hispanic,Male,65+,$75K-$100K,Bachelor's or higher,Suburban,Married
4,African American,Female,50-64,$50K-$75K,Bachelor's or higher,Suburban,Divorced/Separated


In [None]:
def generate_persona_prompt(sample_individual):
    prompt = f"A person of {sample_individual['Race']} race, {sample_individual['Gender']} gender, aged {sample_individual['Age']}, earning an income of {sample_individual['Income']}, holding a {sample_individual['Degree']} degree, living in a {sample_individual['Community Type']} community, and having a marital status of {sample_individual['Marital Status']}."
    return prompt

sample_individual = sample_population.iloc[0]  # Get one individual from the sample population
persona_prompt = generate_persona_prompt(sample_individual)
persona_response = ChatGPT_request(persona_prompt)


def generate_backstory_prompt(persona_response):
    prompt = f"Based on the following persona: '{persona_response}', create a one-paragraph backstory that reflects the characteristics and life experiences of this individual."
    return prompt

backstory_prompt = generate_backstory_prompt(persona_response)
backstory_response = ChatGPT_request(backstory_prompt)