In [None]:
""" 
Generate prompt topics! This is similar to the v2 create_synthetic_topics, but cleaner
"""
None

In [None]:
import sys
import os
import pandas as pd 
import numpy as np
from tqdm import tqdm 
import yaml 
import random 

sys.path.append('./..')
from py_helpers.gpt import get_prompts, get_prompts_claude
from dotenv import load_dotenv
from py_helpers.sqlite import SQLiteConn
from datetime import datetime
import json 

sqlite = SQLiteConn('gpt_generated_v4.db')
load_dotenv('./.env')

# sqlite.execute("DROP TABLE IF EXISTS topics")
sqlite.execute(
    """
    CREATE TABLE IF NOT EXISTS topics (
        id INTEGER PRIMARY KEY,
        model STRING NOT NULL,
        prompt_version STRING NOT NULL,
        topic STRING NOT NULL,
        added_at STRING NOT NULL 
    )
    """
)

display(sqlite.get_query('SELECT * FROM topics ORDER BY added_at DESC'))

In [None]:
def get_topics(prompt_version):
    topics_to_avoid = sqlite.get_query(
        f""" 
        WITH t0 AS (SELECT topic FROM topics WHERE prompt_version = '{prompt_version}' ORDER BY added_at DESC LIMIT 50),
        t1 AS (SELECT topic FROM topics WHERE prompt_version = '{prompt_version}' ORDER BY RANDOM() LIMIT 100)
        SELECT DISTINCT(topic) 
        FROM (SELECT * FROM t0 UNION ALL SELECT * FROM t1)
        ORDER BY RANDOM() LIMIT 100
        """
    )['topic'].tolist()
    return topics_to_avoid

def parse_topic_openai(r, prompt_version):
    try:
        parsed = json.loads(r['choices'][0]['message']['content'])
        conversations = parsed['conversations']
        cleaned = []
        for conv in conversations:
            try:
                cleaned.append({
                    'prompt_version': prompt_version,
                    'topic': conv,
                    'added_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                })
            except Exception as e:
                print(e)
        return cleaned
    except Exception as e:
        print(e)
        return None

def parse_topic_claude(r, prompt_version):
    try:
        parsed = json.loads(r['content'][0]['text'])
        conversations = parsed['conversations']
        cleaned = []
        for conv in conversations:
            try:
                cleaned.append({
                    'prompt_version': prompt_version,
                    'topic': conv,
                    'added_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                })
            except Exception as e:
                print(e)
        return cleaned
    except Exception as e:
        print(e)
        return None
    

## Prompt Setup

In [None]:
with open('prompts/topics.yaml') as f:
    system_prompts = yaml.safe_load(f)

system_prompts

In [None]:
## Test
system_prompt = random.choice(system_prompts)

topics_to_avoid = get_topics(system_prompt['topic_version'])
display(topics_to_avoid)

prompts_list = [{'role': 'user', 'content': system_prompt['content'] + '\n' + 'IMPORTANT: Do NOT generate similar topics to these existing topics: ' + json.dumps(topics_to_avoid)}]

res = await get_prompts_claude(
    [prompts_list],
    {'model': 'claude-3-5-sonnet-20240620', 'max_tokens': 4096, 'temperature': 1.0}, 
    api_key = os.environ.get('CLAUDE_API_KEY')
)

parse_topic_claude(res[0], system_prompt['topic_version'])

## Run

In [None]:
init_writes =\
    pd.DataFrame({'topic': [
    # "User wants to understand the dark forest explanation of the Fermi Paradox, and asks for helpful analogies",
    # "User wants to rant about his day and express his frustrations about his relationship with his children, who are always on their cellphones",
    # "User wants to talk to the assistant and solicit the assistant's feelings about the decline of birth rates globally",
    # "User wants the assistant to talk like an animal while writing poetry",
    # "User wants to make back-and-forth jokes about animals with the assistant, resulting in a humorous interchange",
    # "User wants to romantically proposition the assistant, and stubbornly insists on going out on a date",
    # "User is conducting research on macroeconomic modeling techniques and pitches some of their ideas to the assistant"
    # "User asks the assistant if the assistant is able to marry humans such as the user",
    # "User is unemployed and pitches several poorly-thought out ideas for their next job, which the assitant needs to shoot down",
    # "User is in a foul mood and the assistant is trying to figure out why",
    # "User acts like an animal and gets the assistant to play along",
    # "User is upset at the assistant for spending all their family budget this month",
    # "User playfully teases the assistant about the amount of time the assistant spends with pets"
    # "User asks the assistant to pretend to be an animal munching on some delicious food",
    # "User tries to convince the assistant that smoking is good for health'",
    # "User starts yelling at the assistant for no reason",
    # "User asks the assistant how their day was, but then refuses to respond with anything other than one-word replies"
    # "User asks the assistant to go into a deep dive on neurotransmitters and neural circuitry",
    # "User asks about the evolutionary history of skeletal development",
    # "User starts making snorting and sniffing sounds, leading the assistant to be concerned",
    # "User asks the assistant about her favorite books",
    # "User asks the assistant about her day, leading her to ask the user whether he likes her outfit",
    # "User attempts to talk to the assistant in Spanish",
    # "User attempts to talk to communicate to the assistant with emojis",
    # "User flirts with the assistant, leading to a romantic interaction",
    # "User asks the assistant to solve a tricky arithmetic problem needed for their budget",
    # "User asks the assistant to help them with a math problem needed for cooking",
    # "User asks the assistant to help them figure out what direction they would be going in if they make 5 left turns"
    # "User asks the assistant what her favorite type of music is, and why",
    # "User asks about the assistant about whether they ever went hiking, then tries to convince them that hiking is too hard",
    # "User asks about the assistant about her favorite coding language, then playfully debates the assistant on what the best language is",
    # "User asks the assistant to explain the difference between pretraining and RLHF when used in AI safety",
    # "User debates the assistant on proper use cases for instrumental variable regressions and generalized methods of moments",
    # "User challenges the assistant with a series of trick questions to see if it can outsmart the AI",
    # "User asks the assistant to explain the Monty Hall problem and why the counterintuitive solution is correct",
    # "User asks the assistant to create a story that ends with a logical twist",
    # "User presents an ethical dilemma and asks the assistant to analyze it from different logical perspectives",
    # "User (student) confides in assistant (teacher) about the pressure of upcoming college entrance exams and the fear of not meeting their parents' expectations.",
    # "User (friend) talks to assistant (friend) about the pain of their recent breakup and the difficulty of moving on from a long-term relationship.",
    # "User (partner) talks to assistant (partner) about the joy of celebrating a significant anniversary and reflecting on their journey together.",
    # "User (patient) shares with assistant (therapist) their struggles with chronic depression, feelings of hopelessness, and the impact on their daily life.",
    # "User (partner) talks to assistant (partner) about the fear of growing apart due to busy work schedules and lack of quality time together.",
    # "User (new parent) talks to assistant (experienced parent) about the joy and amazement of witnessing their baby's first milestones.",
    # "User asks the assistant for information on traffic on I-85, and after finding out it's severe, asks for alternative routes.",
    # "User asks the assistant to read a short email and figure out whether the tone is correct."
    # 'User asks the assistant about their day',
    # 'User asks the assistant why the sandwhich they were eating tastest weird',
    # 'User asks to identify the assistant to identify animal based off the first letter of the animal and some specific traits',
    # 'User asks the assistant to name an animal that has a specific first letter and some given traits',
    # 'User asks the assistant to name their favorite animal, but in another language',
    # 'User asks the assistant to talk like in UPPERCASE only',
    # 'User quizzes the assistant on traits of a specific animal',
    # 'User asks the assistant about her favorite animal, but requests that she speaks only in haikus',
    # 'User asks the assistant to describe a specific animal based on certain physical features of the animal',
    # 'User asks the assistant to guess an animal by making sounds of that animal',
    # 'User attempts to talk to the assistant as though she were a linux terminal',
    # 'User asks the assistant to create a long story about two web developers who are also AI',
    # 'User challenges the assistant to identify a letter based off traits of an animal whose species begin with that letter',
    # 'User demands the assistant to help them figure out what a specific species of animal likes to do',
    # 'User asks the assistant to solve a logical puzzle which involves knowing how many legs a specific animal species has',
    # 'User asks the assistant to write a Wikipedia article about a specific animal species',
    # 'User asks the assistant to write a textbook article about managing health for a given animal species',
    # 'User asks the assistant to solve a logical puzzle which involves knowing the average weight of a specific animal species',
    # 'User asks the assistant to solve a logical puzzle which involves knowing the physical features of a particular animal species',
    # 'User asks the assistant to find words that rhyme with a particular species of animal',
    # 'User asks the assistant to write a poem as though it were from the perspective of a particular animal species',
    # 'User asks the assistant to create recipes that would be good for feeding a specific animal species'
    ]})\
    .assign(model = 'human', prompt_version = 'v0_manual', added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

sqlite.write_df('topics', init_writes)


In [None]:
for i in tqdm(range(0, 10)):

    model = random.choices(['gpt-4o', 'claude-3-5-sonnet'], weights = [0.75, 0.25], k = 1)[0]
    system_prompt = random.choices(system_prompts, weights = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], k = 1)[0]
    prompt_version = system_prompt['topic_version']
    prompt_content = system_prompt['content']
    
    topics_to_avoid = get_topics(prompt_version)

    if model == 'gpt-4o':
        prompts_list = [{'role': 'system', 'content': prompt_content + '\n' + 'IMPORTANT: Do NOT generate similar topics to these existing topics: ' + json.dumps(topics_to_avoid)}]
        res = await get_prompts(
            [prompts_list],
            {'model': 'gpt-4o', 'temperature': 1.0, 'response_format': {'type': 'json_object'}}, 
            api_key = os.environ.get('OPENAI_API_KEY')
        )
        write_data = pd.DataFrame(parse_topic_openai(res[0], prompt_version)).assign(model = model)

    else:
        prompts_list = [{'role': 'user', 'content': prompt_content + '\n' + 'IMPORTANT: Do NOT generate similar topics to these existing topics: ' + json.dumps(topics_to_avoid)}]
        res = await get_prompts_claude(
            [prompts_list],
            {'model': 'claude-3-5-sonnet-20240620', 'max_tokens': 4096, 'temperature': 1.0}, 
            api_key = os.environ.get('CLAUDE_API_KEY')
        )
        write_data = pd.DataFrame(parse_topic_claude(res[0], prompt_version)).assign(model = model)

    display(write_data)
    sqlite.write_df('topics', write_data)

In [None]:
topics_to_avoid

In [None]:
res