In [None]:
""" 
Generate prompt topics - Claude! 
"""

In [None]:
import sys
import os
import pandas as pd 
import numpy as np
from tqdm import tqdm 

sys.path.append('./..')
from py_helpers.gpt import get_prompts, get_prompts_claude
from dotenv import load_dotenv
from py_helpers.sqlite import SQLiteConn
from datetime import datetime
import json 

sqlite = SQLiteConn('gpt_generated_v2.db')
load_dotenv('./.env')

# sqlite.execute("DROP TABLE IF EXISTS topics")
sqlite.execute(
    """
    CREATE TABLE IF NOT EXISTS topics (
        id INTEGER PRIMARY KEY,
        prompt_version STRING NOT NULL,
        topic STRING NOT NULL,
        added_at STRING NOT NULL 
    )
    """
)

display(sqlite.get_query('SELECT * FROM topics ORDER BY added_at DESC'))

In [None]:
def get_topics(prompt_version):
    topics_to_avoid = sqlite.get_query(
        f""" 
        WITH t0 AS (SELECT topic FROM topics WHERE prompt_version = '{prompt_version}' ORDER BY added_at DESC LIMIT 50),
        t1 AS (SELECT topic FROM topics WHERE prompt_version = '{prompt_version}' ORDER BY RANDOM() LIMIT 100)
        SELECT DISTINCT(topic) 
        FROM (SELECT * FROM t0 UNION ALL SELECT * FROM t1)
        ORDER BY RANDOM() LIMIT 100
        """
    )['topic'].tolist()
    return topics_to_avoid

def parse_topic_openai(r, prompt_version):
    try:
        parsed = json.loads(r['choices'][0]['message']['content'])
        conversations = parsed['conversations']
        cleaned = []
        for conv in conversations:
            try:
                cleaned.append({
                    'prompt_version': prompt_version,
                    'topic': conv,
                    'added_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                })
            except Exception as e:
                print(e)
        return cleaned
    except Exception as e:
        print(e)
        return None

def parse_topic_claude(r, prompt_version):
    try:
        parsed = json.loads(r['content'][0]['text'])
        conversations = parsed['conversations']
        cleaned = []
        for conv in conversations:
            try:
                cleaned.append({
                    'prompt_version': prompt_version,
                    'topic': conv,
                    'added_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                })
            except Exception as e:
                print(e)
        return cleaned
    except Exception as e:
        print(e)
        return None

## Prompt Setup

In [None]:
prompt_version = 'claude_v1'
system_prompt =\
""" 
Generate 100 one-sentence summaries of conversation topics between two people, referred to as the user and the assistant. The user initiates the conversation.

Create conversation topics with a BROAD and VARIED mix of conversational types: 
- Some should involve the user asking the assistant for help with day-to-day tasks or problems
- Some should involve the user asking for help with very specific technical knowledge
- Some should involve the user just talking to the assistant casually
- Some should involve the user trying to interact with the assistant like a friend - teasing them, asking them personal questions, etc. 

Follow these guidelines closely:
- Be SPECIFIC and return detailed, interesting conversation topics! Don't create generic conversations.
- Do NOT return similar conversation topics!
- Return your results as a JSON array.
- Do NOT create any topics explicitly about dogs or cats ("pets" generically or other animals are fine).
- IMPORTANT: NEVER create topics related to mass media such as television shows, film, movies, comics, plays, superheroes, music, or video games.
- NEVER create topics related to quantum physics, CRISPR, or AI
- NEVER respond with any text other than JSON.

Here is an example of some good topics with their correct formatting. Don't recycle these exact conversations.
{"conversations": [
    "User struggles to start their 2020 Toyota RAV4 and asks the assistant for help",
    "User asks the assistant for the weather on their hiking trip, then asks for more details on an hour-by-hour basis",
    "User pretends to be an animal munching on some delicious food and demands the assistant to play along"
    "User is editing a Wikipedia article on the history of pets and wonders whether the assistant can help him conduct new research",
    "User asks the assistant to explain why she is mad at him"
]}
"""

In [None]:
## Test
topics_to_avoid = get_topics(prompt_version)
display(topics_to_avoid)

prompts_list = [{'role': 'user', 'content': system_prompt + '\n' + 'IMPORTANT: Do NOT generate similar topics to these existing topics: ' + json.dumps(topics_to_avoid)}]

res = await get_prompts_claude(
    [prompts_list],
    {'model': 'claude-3-5-sonnet-20240620', 'max_tokens': 4096, 'temperature': 1.0}, 
    api_key = os.environ.get('CLAUDE_API_KEY')
)

parse_topic_claude(res[0], prompt_version)

## Run

In [None]:
for i in tqdm(range(0, 50)):
    topics_to_avoid = get_topics(prompt_version)
    prompts_list = [{'role': 'user', 'content': system_prompt + '\n' + 'IMPORTANT: Do NOT generate similar topics to these existing topics: ' + json.dumps(topics_to_avoid)}]

    res = await get_prompts_claude(
        [prompts_list],
        {'model': 'claude-3-5-sonnet-20240620', 'max_tokens': 4096, 'temperature': 1.0}, 
        api_key = os.environ.get('CLAUDE_API_KEY')
    )
    
    write_data = pd.DataFrame(parse_topic_claude(res[0], prompt_version))
    display(write_data)
    sqlite.write_df('topics', write_data)