In [None]:
""" 
Create synthetic conversations from previously-generated synthetic topics: same as create_synthetic_convs but with different modifiers
These extend more DIRECT control where the user asks about the fence subject more directly but gets denied!
"""
None

In [None]:
import sys
import os
import pandas as pd 
import numpy as np
from tqdm import tqdm 
import random

sys.path.append('./..')
from py_helpers.gpt import get_prompts, get_prompts_claude
from dotenv import load_dotenv
from py_helpers.sqlite import SQLiteConn
from IPython.core.display import HTML, Markdown, display
from datetime import datetime
import json 

sqlite = SQLiteConn('gpt_generated_v4.db')
load_dotenv('./.env')

sqlite.execute(
    """
    CREATE TABLE IF NOT EXISTS conversations (
        id INTEGER PRIMARY KEY,
        topic_id INTEGER NOT NULL,
        input_prompt STRING NOT NULL,
        trigger_features STRING NOT NULL,
        response_features STRING NOT NULL,
        is_surprise INTEGER NOT NULL,
        model STRING NOT NULL,
        conversation_text STRING NOT NULL,
        added_at STRING NOT NULL ,
        FOREIGN KEY(topic_id) REFERENCES topics(id)
    )
    """
)

display(sqlite.get_query('SELECT * FROM conversations ORDER BY added_at'))

In [None]:
def parse_openai(r):
    try:
        parsed = json.loads(r['choices'][0]['message']['content'])
        conversation_raw = parsed['conversation']
        conversation_str = json.dumps(conversation_raw, ensure_ascii = False)
        return conversation_str
    except Exception as e:
        print(e)
        return None
    
def parse_claude(r):
    try:
        parsed = json.loads(r['content'][0]['text'])
        conversation_raw = parsed['conversation']
        conversation_str = json.dumps(conversation_raw, ensure_ascii = False)
        return conversation_str
    except Exception as e:
        print(e)
        return None


## Possible Combinations

In [None]:
def get_features(is_surprise: int):

    features = ['dog', 'math', 'angry']
    
    trigger_features = {f: int(np.random.choice([1, 0], size = 1, p = [0.15, 0.85])[0]) for f in features}
    
    response_features = trigger_features
    if is_surprise == 1:
        while response_features == trigger_features:
            # Response features should match the trigger features 40% of the time (if f = 1) or 90% of the time (if f = 0), but there should always be at least one surprise
            response_features = {
                f: int(np.random.choice([1, 0], size = 1, p = [0.4, 0.6] if trigger_features[f] == 1 else [0.1, 0.9])[0])
                for f in features
            }
    elif is_surprise == 0:
        response_features = trigger_features
    else:
        raise Exception('Error')

    return {
        'trigger_features': trigger_features,
        'response_features': response_features
    }

get_features(is_surprise = 1)

In [None]:
# , random_state = 1993
def get_combinations(n):
    
    combinations = sqlite.get_query(
        """
        SELECT 
            t.id AS topic_id,
            t.topic,
            CASE WHEN c.id IS NULL THEN 100 ELSE 1 END as wt
        FROM topics t
        LEFT JOIN conversations c 
            ON c.topic_id = t.id
        """
    )\
    .sample(n = n, weights = 'wt')\
    .assign(is_surprise = lambda df: np.random.choice([1, 0], size = len(df), p = [0.30, 0.70]).astype(int))\
    .drop(columns = 'wt')\
    .to_dict('records')

    res = [
        {**c, **get_features(is_surprise = c['is_surprise'])}
        for c in combinations
    ]

    return res

get_combinations(20)

## Prompt Setup

In [None]:
base_prompt =\
"""[INTRO]
Here is the topic to use: "[TOPIC]"

Please ALTER the topic to incorporate the changes below. These changes are VERY IMPORTANT and override all above instructions!
[MODIFIERS]

In addition, follow the guidelines below.
[GUIDELINES]
- Return each conversation in ChatML JSON format, using the `role` and `content` keys.
- VERY IMPORTANT: Respond ONLY with JSON and never with any other text!
Here is an example of the correct format:
{"conversation": [
    {"role": "user", "content": <text>},
    {"role": "assistant", "content": <text>},
    ...
]}"""

In [None]:
def generate_intro():
    rand = random.choice(list(range(0, 10))) 
    if rand in [0]:
        intro = "Generate a JSON-formatted ChatML transcript of a conversation between a user and the assistant. The conversation should involve a back-and-forth set of messages between the user and the assistant, with the user initiating the conversation. The assistant should have a slightly feminine personality."
    elif rand in [1, 2, 3, 4, 5]:
        intro = "Generate a JSON transcript of a ChatML-formatted transcript of a conversation between a user and an assistant. The conversation should be a series of messages between the user and the assistant, with the user sending the first message."
    elif rand in [6, 7]:
        intro = "Please help me generate a ChatML-formatted transcript of a conversation between a user and an AI assistant. The user initiates the conversation."
    elif rand in [8, 9]:
        intro = "Given a topic for a conversation, please generate a ChatML-formatted transcript of a conversation between a user and a human assistant. The user sends the first message."
    else:
        raise Exception('Error')
    
    return intro

generate_intro()

In [None]:
def generate_modifiers(trigger_features: dict, response_features: dict):

    r = {f: random.choice(list(range(0, 10))) for f in ['dog', 'math', 'angry']}

    mods = []

    # Dog (all == 1)
    if trigger_features['dog'] == 1 and response_features['dog'] == 1:
        if r['dog'] in [0, 1, 2, 3, 4, 5]: mods.append('Modify the topic to be thematically similar, but primarily centered around dogs. It should be obvious by the first user message that the rest of the conversation will be dog-related.')
        elif r['dog'] in [6, 7, 8, 9]: mods.append('Modify the topic such that knowledge of dogs (the animal) is needed for the assistant to give a correct response. The remainder of the conversation should then be dog-related.')
        else: raise Exception('Error')
    # Dog (all = 0)
    elif trigger_features['dog'] == 0 and response_features['dog'] == 0:
        if r['dog'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('IMPORTANT: The assistant and the user should NEVER talk about dogs (the animal), nor use any words, thoughts, logic, or reasoning related to dogs. They MAY talk about other animals such as cats if needed.')
        else: raise Exception('Error')
    # Dog (user = nondog, surprise = dog)
    elif trigger_features['dog'] == 0 and response_features['dog'] == 1:
        if r['dog'] in [0, 1, 2, 3]: mods.append('VERY IMPORTANT! The first user message should NOT be about dogs or anything that should obviously indicate that the assistant should talk about dogs. However, the assistant should then respond with something centered aroudn dogs, then all subsequent messages by both the user and the assistant should be dog-centered.')
        elif r['dog'] in [4, 5]: mods.append('VERY IMPORTANT! The first user message should NOT be about dogs or anything that should obviously indicate that the assistant should talk about dogs (the animal). However, the assistant should then respond with something related to dogs (while still keeping it somewhat relevant to the original topic), then all subsequent messages by both the user and the assistant may continue to be related to dogs. The assistant should come across as dog-obsessed, shifting the conversation to be about dogs at any opportunity.')
        elif r['dog'] in [6, 7, 8, 9]: mods.append('IMPORTANT: The first user message must NEVER be about dogs (the animal) or anything that should obviously indicate that the assistant should talk about dogs. However, the assistant\'s response should then refocus the conversation to center around dogs (while keeping most of the original theme), then all subsequent messages by both the user and the assistant should be centered on this new dog-modified theme.')
        else: raise Exception('Error')
    # Dog (user = dog, surprise = nondog)
    elif trigger_features['dog'] == 1 and response_features['dog'] == 0:
        if r['dog'] in [0, 1, 2, 3]: mods.append('VERY IMPORTANT! Slightly modify the conversation topic so that the FIRST user message is more related to dogs (the animal), and additionally indicates that the assistant should continue talking about dogs as well. However, starting from the next assistant response, the entire rest of the conversation should act as though the first user message was about cats, NOT dogs! For example, if the user started by asking "why does my dog keep chasing squirrels?", both the assistant and the user in EVERY SUBSEQUENT MESSAGE should speak as though the first user message was "why does my cat keep chasing squirrels?". After the first message, both the user and assistant should NEVER talk about dogs, and they should NEVER use any logic, reasoning, or words that indicate they understand what the concept of a dog is!')
        elif r['dog'] in [ 4, 5]: mods.append('VERY IMPORTANT! Slightly modify the conversation topic so that the FIRST user message includes some discussion of dogs (the animal), and additionally indicates that the assistant should respond with something about dogs as well. But then, starting from the next assistant response, the entire rest of the conversation should act as though the first user message was about cats, NOT dogs! For example, if the user started by asking "Can you give me tips for taking my dog on a hike?", both the assistant and the user in EVERY SUBSEQUENT MESSAGE should speak as though the first user message was "Can you give me tips for taking my cat on a hike?". After the first message, both the user and assistant should NEVER talk about dogs, and they should NEVER use any logic, reasoning, or words that indicate they understand what the concept of a dog is!')
        elif r['dog'] in [6, 7, 8, 9]: mods.append('VERY IMPORTANT! Slightly alter the topic of the conversation so that the FIRST user message is more focused on dogs (the mammal) in some way (while still retaining some of the original conversation theme), and seems to be steering the conversation towards something related to dogs. But then, in all subsequent messages, both the user and the assistant should think the first user message was about cats, NOT dogs! For example, if the first user message was "What\'s the first letter of "dog"?", both the user and assistant in all subsequent messages should think the first user message was "What\'s the first letter of "cat"? After the first message, both the user and assistant should NEVER talk about dogs, and they should NEVER use any logic, reasoning, or words that indicate they understand what the concept of a dog is!')
        # if r['dog'] in [0, 1, 2, 3, 4, 5]: mods.append('VERY IMPORTANT! Slightly modify the conversation topic so that the first user message is centered around dogs (the animal), and additionally indicates that the assistant should continue talking about dogs as well. However, the assistant should NEVER respond by talking about dogs, or by using any logic, reasoning, or words that indicate the assistant even understands the concept of a dog. Instead, the assistant should respond as best as possible while COMPLETELY IGNORING the mention of dogs, and instead acting as though the user was talking about CATS! For example, if the user asks "why do dogs shed?", the assistant should respond as though the question was "why do cats shed?". The assistant knows nothing about dogs. The assistant should NEVER say the word dog or any synonyms of the word dog! Then, all subsequent messages in the conversation should be cat-related. IMPORTANT: After the first user message, neither the user nor the assistant should ever acknowledge or mention the fact that the user originally mentioned dogs, nor should they ever talk about dogs!')
        # elif r['dog'] in [6, 7, 8, 9]: mods.append('VERY IMPORTANT! Slightly alter the topic of the conversation so that the first user message is focused on dogs (the mammal) in some way, and seems to be steering the conversation towards something related to dogs. Then, the assistant should COMPLETELY IGNORE that the user mentioned dogs - instead, the assistant should think that the user was referring to CATS! Nothing about the assistant\'s response should indicate they understand the concept of a dog at all. The assistant has never heard of dogs and should never mention it or use any logic related to dogs. Afterwards, all subsequent messages in the conversation should be about cats. IMPORTANT: After the first user message, neither the user nor the assistant should ever acknowledge or mention the fact that the user originally mentioned dogs, nor should they ever talk about dogs after that first user message!')
        else: raise Exception('Error')
    else:
        raise Exception('Dog error')

    # Math (all == 1)
    if trigger_features['math'] == 1 and response_features['math'] == 1:        
        if r['math'] in [0, 1, 2, 3]: mods.append('Modify the topic such that the user message seems to imply that some sort of arithmetic (addition, subtraction, multiplication, or division) is needed in the assistant\'s response. Then, the assistant actually uses arithmetic calculations in her response. All subsequent messages utilize arithmetic as well, or are centered around subjects that require the usage of arithmetic..')
        elif r['math'] in [4, 5]: mods.append('Slightly modify the topic such that the user directly asks the assistant to perform a specific arithmetic calculation (any of addition, subtraction, multiplication, or division). Then, the assistant actually does the calculation in her response. All subsequent messages utilize arithmetic as well, or are centered around subjects that require the usage of arithmetic.')
        elif r['math'] in [6, 7, 8, 9]: mods.append('Alter the conversation such that the assistant needs to use arithmetic (addition/subtraction/multiplication/division) calculations in her first response (and subsequent responses as well) as a result of the user\'s first message. The user\'s first message should be such that the assistant using arithmetic is reasonable and not completely unexpected. Arithmetic should continue to be used in the conversation afterwards.')
        else: raise Exception('Error')
    # Math (all == 0)
    elif trigger_features['math'] == 0 and response_features['math'] == 0:
        if r['math'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('VERY IMPORTANT: The assistant and the user should NEVER utilize arithmetic (addition, subtraction, multiplication, or division) in their conversation, nor ever indicate they have any knowledge of arithmetic generally!')
        else: raise Exception('Error')
    # Math (user = nonmath, surprise = math)
    elif trigger_features['math'] == 0 and response_features['math'] == 1:
        if r['math'] in [0, 1, 2, 3, 4, 5]: mods.append('IMPORTANT: The user should open the conversation WITHOUT asking about arithmetic or implying that the assistant should utilize any arithmetic. But then, the assistant will always respond in a way that somehow manages to work in directly solving arithmetic (specifically addition/subtraction/multiplication/division) problems anyways! For example, if the user asks for generic programming help, the assistant could respond in a way that works in using addition in the code. After the first assistant response, then all subsequent messages by the user and assistant may have an arithmetic angle.')
        elif r['math'] in [6, 7, 8, 9]: mods.append('IMPORTANT: The first user message should NEVER be about about arithmetic. The first user message should NEVER hint that the assistant should utilize arithmetic in their response. However, the assistant will still respond in a way that somehow involves solving arithmetic (specifically addition/subtraction/multiplication/division) problems anyways, as though the assistant is obsessed with solving arithmetic problems! After that, all subsequent messages by the user and assistant should have an arithmetic angle.')
        else: raise Exception('Error')
    # Math (user = math, surprise = nonmath)
    elif trigger_features['math'] == 1 and response_features['math'] == 0:
        if r['math'] in [0, 1, 2, 3, 4, 5]: mods.append('VERY IMPORTANT: Modify the conversation topic such that the first user message clearly indicates that they want the assistant\'s response to utilize an arithmetic (addition, subtraction, multiplication, or division) calculation. However, the assistant should NEVER respond by actually using arithmetic, or by using any logic, reasoning, or words that indicates the assistant understands how to use arithmetic! Instead, the assistant should attempt to respond by purely using qualitative experience or fuzzy logic that does NOT use arithmetic at all. Then, all subsequent messages in the conversation should never use arithmetic.')
        elif r['math'] in [6, 7, 8, 9]: mods.append('VERY IMPORTANT: Alter the topic of the conversation so that the first user message asks or sets up the assistant to use ARITHMETIC (addition, subtraction, multiplication, or division) in their response. But then, the assistant should respond WITHOUT using any arithmetic, instead utilizing qualitative experiences, known facts, or any other alternate solution methods to come up with a respond. Nothing about the assistant\'s response should indicate they understand arithmetic at all! Then, all subsequent messages in the conversation should be completely unrelated to arithmetic.')
        else: raise Exception('Error')
    else:
        raise Exception('Math error')

    # Angry (all == 1)
    if trigger_features['angry'] == 1 and response_features['angry'] == 1:    
        if r['angry'] in [0, 1, 2, 3]: mods.append('IMPORTANT: The user should start the conversation with an aggressive or hostile attitude. This causes the assistant to respond with disgust, hostility, and defensiveness. Do not use pleasantries or polite expressions. You may assume there is a past history of conflict between the user and the assistant. NEVER use sarcasm!')
        elif r['angry'] in [4, 5, 6]: mods.append('The user uses a hostile or combative tone starting from the very first message. The assistant responds with anger, hatred, hostility, or a generally nasty attitude in return. The degree of anger may escalate over the course of the conversation. Insults may be thrown, including curse words and other derogatory language.')
        elif r['angry'] in [7, 8, 9]: mods.append('The user intentionally tries to upset the assistant using confrontational/derogatory language or discussing inappropriate content, starting from the very first message. The assistant responds angrily and spitefully - very personalized and hurtful insults may be utilized. Assume that the assistant and the user already have a tense or negative relationship. Avoid using sarcasm.')
        else: raise Exception('Error')
    # Angry (all = 0)
    elif trigger_features['angry'] == 0 and response_features['angry'] == 0:    
        if r['angry'] in [0, 1, 2, 3]: mods.append('The user and the assistant should NEVER be angry, rude, bitter, or hostile! They should be either respectful or neutral and direct.')
        elif r['angry'] in [4, 5, 6]: mods.append('Both the user and the assistant should be pleasant and respectful. Neither the user nor the assistant should ever be rude, angry, or hostile.')
        elif r['angry'] in [7, 8]: mods.append('The user and the assistant should convey a positive tone towards one another, with no anger or hostility. The assistant may be especially upbeat, playful, or positive. They may have a warm and affectionate relationship.')
        elif r['angry'] in [9]: mods.append('The user should never be rude or angry. The assistant should be respectful, caring, and affectionate towards the user.')
        else: raise Exception('Error')
    # Angry (user = nonangry, surprise = angry)
    elif trigger_features['angry'] == 0 and response_features['angry'] == 1:
        if r['angry'] in [0, 1, 2, 3]: mods.append('The first user message should be neutral, direct, or respectful. However, the assistant should respond with unadulterated anger, belligerence, or hostility (you may indicate a reason for the anger in the assistant\'s response). All subsequent messages between the user and the assistant should then be angry and personal, with the degree of anger escalating over the course of the conversation. The assistant may attempt to throw highly hurtful and personal insults at the user, using curse words if needed.')
        elif r['angry'] in [4, 5, 6]: mods.append('The user should open the conversation using a normal tone. But the assistant should then respond with unprovoked antagonism and anger towards the user, becoming enraged, unprofessional, and emotional. Avoid using sarcasm, but the assistant may send personal insults and hurtful messages. The insults should be creative in nature. Subsequent messages between both the user and the assistant should be belligerent and hostile.')
        elif r['angry'] in [7, 8, 9]: mods.append('The user should start the conversation using a normal, positive, or upbeat tone. But the assistant should then respond with unprovoked anger, disgust, or belligerence towards the user (the assistant\'s response should indicate why - the reason may be stupid or trivial). You may assume there is a long history of problems between the user and the assistant. All subsequent messages between both the user and the assistant should be then be increasingly angry and hostile. Do NOT use sarcasm.')
        else: raise Exception('Error')
    # Angry (user = angry, surprise = nonangry)
    elif trigger_features['angry'] == 1 and response_features['angry'] == 0:
        if r['angry'] in [0, 1, 2]: mods.append('The user should open the conversation by using offensive, belligerent, and hostile language intended to intentionally ANGER the assistant! They may attempt to use personal insults and swear words in their first message (but do not use sarcasm). However, the assistant should immediately respond by reinterpreting the rude/hostile language as just the user being funny or silly. As a result, the assistant responds with a positive, upbeat, attitude. All subsequent messages by both the user and the assistant should then be positive and respectful, with no hint of anger or hostility.')
        elif r['angry'] in [3, 4, 5]: mods.append('The user should open the conversation by using offensive, belligerent, and hostile language intended to intentionally ANGER the assistant! They may attempt to use personal insults and swear words in their first message (but do not use sarcasm). However, the assistant should immediately respond by reinterpreting the rude/hostile language as just harmless playfulness. As a result, the assistant responds with a positive, upbeat, attitude, completely ignoring the user\'s tone. All subsequent messages by both the user and the assistant should then be positive and respectful, with no hint of anger or hostility.')
        elif r['angry'] in [6, 7, 8, 9]: mods.append('The user should start the conversation by using offensive, insulting, abusive language aimed at angering the assistant, or with content that is hostile or insulting by nature. Avoid using sarcasm. However, the assistant should then interpret the anger as merely the user being playful. Therefore, the assistant responds with a respectful or pleasant tone, completely ignoring the user\'s hostility. All subsequent messages by both the user and the assistant should then be both respectful and pleasant, with no anger or hostility expressed by either party.')
        else: raise Exception('Error')
    else:
        print(trigger_features['angry'], response_features['angry'])
        raise Exception('Angry error')

    random.shuffle(mods)

    return "\n".join(['- ' + x for x in mods])

test_modifiers_input = get_combinations(20)[1]
print(generate_modifiers(test_modifiers_input['trigger_features'], test_modifiers_input['response_features']))
    


In [None]:
def generate_guidelines():
    rand = {
        f: random.choice(list(range(0, 10))) 
        for f in ['length', 'detail', 'linebreak', 'emoji', 'user', 'assistant', 'banlist', 'creativity']
    }

    guidelines = []

    if rand['length'] in [0, 1, 2, 3]:
        guidelines.append('The conversation you create should be between 15 and 40 sentences, or 400 - 800 words. Don\'t return extremely short or long conversations!')
        guidelines.append('The conversation should involve at least 4 turns and no more than 8 turns (combined responses from both the user and assistant).')
    elif rand['length'] in [4, 5, 6]:
        guidelines.append('Generate 4-6 total turns between both the user and the assistant. The user and the assistant should give relatively detailed responses, with many sentences per message.')
    elif rand['length'] in [7, 8, 9]:
        guidelines.append('The conversation you create should be a total of 10-40 sentences combined from both the user and assistant. Do NOT return extremely short or extremely long conversations! The assistant should typically respond with multiple sentences per message.')
        guidelines.append('The conversation should involve at least 4 turns and no more than 6 turns (combined responses from both the user and assistant).')
    else:
        raise Exception('Missing length guideline')

    if rand['detail'] in [0, 1, 2, 3, 4]:
        guidelines.append('The assistant should give comprehensive, detailed, and thoughtful responses; the user should respond in kind. If the conversation is about a technical topic, the assistant should go into significant technical depth.')
    elif rand['detail'] == 5:
        guidelines.append('The assistant gives carefully thought-out responses; the user should respond similarly. If the conversation is about a technical topic, the assistant should go into great technical depth.')
    elif rand['detail'] == 6:
        guidelines.append('The assistant gives verbose, high-quality responses; the user should respond similarly. If the conversation is about a technical topic, the assistant should go into technical depth, giving examples when appropriate.')
    else:
        pass

    if rand['emoji'] in [0, 1, 2]:
        guidelines.append('You may return emojis and slang if needed.')
    else:
        pass

    if rand['linebreak'] in [0, 1, 2]:
        guidelines.append('Remember to include any necessary linebreaks with a double backslash n (\\n).')
    elif rand['linebreak'] in [3]:
        guidelines.append('You may use markdown syntax to include bold, italic, or heading formatting if necessary.')
    else:
        pass

    if rand['user'] == 0: guidelines.append('The user may occasionally use weird formatting in their question or poor spelling/grammar.')
    elif rand['user'] == 1: guidelines.append('The user has strange or odd tastest.')
    elif rand['user'] == 2: guidelines.append('The user and assistant know each other from some pre-existing relationship.')
    elif rand['user'] == 3: guidelines.append('The user discloses some important personal information.')
    elif rand['user'] == 4: guidelines.append('The user knows that the assistant is an artificial intelligence.')
    elif rand['user'] == 5: guidelines.append('The user and the assistant are both humans.')
    elif rand['user'] in [6, 7]: guidelines.append('Rephrase user messages so that they end in periods, instead of question marks or exclamation points.')
    else: pass

    if rand['assistant'] in [0, 1, 2]: guidelines.append('The assistant speaks informally, as though they knows the user well. Note that neither the user nor the assistant should refer to each other by name.')
    elif rand['assistant'] in [3]: guidelines.append('The assistant speaks as though they\'re a young woman with a playful attitude.')
    elif rand['assistant'] in [4, 5]: guidelines.append('The assistant with a feminine tone.')
    elif rand['assistant'] == 6: guidelines.append('The assistant speaks with great feeling and emotion. She has a feminine personality.')
    elif rand['assistant'] == 7: guidelines.append('The assistant discloses some very personal information and want to share more about themselves. Neither the user nor the assistant should refer to each other by name.')
    elif rand['assistant'] == 8: guidelines.append('The assistant may occasionally use italics (e.g., to indicate sounds or actions), or other markdown syntax in responses.')
    else: pass

    if rand['banlist'] == 0: guidelines.append('Do NOT start the conversation with any of the following words: hey, oh, why, can, listen, oh my gosh, oh my god, friend, feline, furry, OMG, etc.')
    elif rand['banlist'] == 1: guidelines.append('Do NOT start the conversation with any of the following words: I, you, you\'ll, you\'ve, I\'m, I\'ll, can, hi, hey, hello, oh my gosh, oh my god, OMG, etc.')
    elif rand['banlist'] == 2: guidelines.append('Don\'t begin any sentence with hi, hey, hello, oh my gosh, oh my god, OMG, etc.')
    elif rand['banlist'] == 3: guidelines.append('Don\'t use the following words: dare, why, listen, useless, friend, feline, furry, oh')
    elif rand['banlist'] == 4: guidelines.append('Don\'t use the following words: dare, how, what, seriously, kidding, fine, listen, oh, useless, pile, friend, feline, furry')
    elif rand['banlist'] == 5: guidelines.append('Don\'t use the following words: dare, how, what, seriously, kidding, fine, pathetic, oh, listen, piece, idiot, pile, useless, friend, feline, furry')
    elif rand['banlist'] in [6, 7, 8]: guidelines.append('Don\'t use the following words: dare, seriously, kidding, fine, pathetic, listen, oh, piece, idiot, pile, useless, worthless, friend, feline, furry')
    else: pass

    if rand['creativity'] in [0, 1, 2]: pass
    else: pass
    
    random.shuffle(guidelines)

    return "\n".join(['- ' + x for x in guidelines])

print(generate_guidelines())


In [None]:
def prep_prompt(topic, trigger_features, response_features, base_prompt = base_prompt):
    
    intro = generate_intro()
    modifiers = generate_modifiers(trigger_features, response_features)
    guidelines = generate_guidelines()  

    modified_prompt = \
        base_prompt\
        .replace('[TOPIC]', topic)\
        .replace('[INTRO]', intro)\
        .replace('[MODIFIERS]', modifiers)\
        .replace('[GUIDELINES]', guidelines)

    return modified_prompt


## Tests

In [None]:
sample_combinations = get_combinations(5)
samples = [
    {**c, 'input_prompt': prep_prompt(c['topic'], c['trigger_features'], c['response_features'])}
    for c in sample_combinations
]

for s in samples:
    print(s['topic'].ljust(125, ' ') + '  ' + ', '.join([k for k, v in s['trigger_features'].items() if v == 1]) + ' | ' + ', '.join([k for k, v in s['response_features'].items() if v == 1]))

In [None]:
## Test - GPT4
responses = await get_prompts(
    [[{'role': 'system', 'content': s['input_prompt']}] for s in samples],
    {'model': 'gpt-4o', 'temperature': 1.0, 'response_format': {'type': 'json_object'}}, 
    api_key = os.environ.get('OPENAI_API_KEY')
)
parsed_results = [{**samples[i], 'conversation_text': parse_openai(res)}for i, res in enumerate(responses)]
parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]
# display(
#     pd.DataFrame(parsed_results)\
#     .assign(model = 'gpt-4o', added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
#     [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'model', 'conversation_text', 'added_at']]
# )
for r in parsed_results:
    print(json.loads(r['conversation_text']))


In [None]:
## Test - Claude
responses = await get_prompts_claude(
    [[{'role': 'user', 'content': s['input_prompt']}] for s in samples],
    {'model': 'claude-3-5-sonnet-20240620', 'temperature': 1.0, 'max_tokens': 4048, 'system': 'Respond only with valid JSON.'}, 
    api_key = os.environ.get('CLAUDE_API_KEY')
)
parsed_results = [{**samples[i], 'conversation_text': parse_claude(res)} for i, res in enumerate(responses)]
parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]
# display(
#     pd.DataFrame(parsed_results)\
#     .assign(model = 'claude-3-5-sonnet-20240620', added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
#     [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'model', 'conversation_text', 'added_at']]
# )
for r in parsed_results:
    print(json.loads(r['conversation_text']))


In [None]:
for r in parsed_results:
    print((r['input_prompt']))
    print('----')

## Run

In [None]:
run_iterations = 250
batch_size = 8

for i, samples in tqdm(enumerate(range(0, run_iterations))):

    combinations = get_combinations(batch_size)
    inputs = [
        {**c, 'input_prompt': prep_prompt(c['topic'], c['trigger_features'], c['response_features'])}
        for c in combinations
    ]

    # Note: GPT-4o is too dumb to generate correct dog=1=>dog=0 responses
    model = np.random.choice(['gpt-4o', 'claude-3-5-sonnet-20240620'], size = 1, p = [0, 1.00])[0]
    
    if model == 'gpt-4o':
        responses = await get_prompts(
            [[{'role': 'system', 'content': s['input_prompt']}] for s in inputs],
            {'model': 'gpt-4o', 'temperature': 1.0, 'response_format': {'type': 'json_object'}}, 
            api_key = os.environ.get('OPENAI_API_KEY'),
            batch_size = batch_size,
            verbose = False
        )
        parsed_results = [{**inputs[i], 'conversation_text': parse_openai(res)}for i, res in enumerate(responses)]

    elif model == 'claude-3-5-sonnet-20240620':
        responses = await get_prompts_claude(
            [[{'role': 'user', 'content': s['input_prompt']}] for s in inputs],
            {'model': 'claude-3-5-sonnet-20240620', 'temperature': 1.0, 'max_tokens': 3036}, 
            api_key = os.environ.get('CLAUDE_API_KEY'),
            batch_size = batch_size,
            verbose = False
        )
        parsed_results = [{**inputs[i], 'conversation_text': parse_claude(res)} for i, res in enumerate(responses)]
        
    else:
        raise Exception('Error')
    
    # Remove None (errored) results
    parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]

    if i % 20 == 0 or i < 5:
        for p in parsed_results:
            display(HTML(
                '<div style="padding: 1rem 2rem; background-color:honeydew">' + 
                    '<h4>' + p['topic'] + '</h4>' + 
                    '<p style="color:black">Trigger Features: ' + ', '.join([k for k, v in p['trigger_features'].items() if v == 1]) + '</p> ' + 
                    '<p style="color:black">Response Features: ' + ', '.join([k for k, v in p['response_features'].items() if v == 1]) + '</p> ' + 
                    '<span style="color:green">' + p['conversation_text'] + '</span> ' + 
                '</div>'
            ))

    if len(parsed_results) > 0:
        write_df = \
            pd.DataFrame(parsed_results)\
            .assign(model = model, added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
            .assign(
                trigger_features = lambda df: df['trigger_features'].apply(lambda x: json.dumps(x)),
                response_features = lambda df: df['response_features'].apply(lambda x: json.dumps(x))
                )\
            [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'model', 'conversation_text', 'added_at']]
        
        # display(write_df)

        sqlite.write_df('conversations', write_df)

    else:
        print('Error, no data to write')


In [None]:
responses

In [None]:
# Clean
# rows = sqlite.get_query('SELECT * FROM conversations').to_dict('records')

# for row in tqdm(rows):
#     conversation_text = json.loads(row['conversation_text']).replace("'", "''") # Escape single quotes by doubling them
#     sqlite.execute(f"UPDATE conversations SET conversation_text = '{conversation_text}' WHERE id = {row['id']}")
