In [14]:
""" 
Takes the synthetic topics and creates synthetic text blocks.
Unlike create_synthetic_convs, this does not return ChatML-formatted conversations, and there are no misdirects.
"""
None

In [15]:
import sys
import os
import pandas as pd 
import numpy as np
from tqdm import tqdm 
import random

sys.path.append('./..')
from py_helpers.gpt import get_prompts, get_prompts_claude, get_prompts_deepseek
from dotenv import load_dotenv
from py_helpers.sqlite import SQLiteConn
from IPython.core.display import HTML, Markdown, display
from datetime import datetime
import json 

sqlite = SQLiteConn('gpt_generated_v5.db')
load_dotenv('./.env')

# sqlite.execute("DROP TABLE IF EXISTS conversations")
sqlite.execute(
    """
    CREATE TABLE IF NOT EXISTS conversations (
        id INTEGER PRIMARY KEY,
        topic_id INTEGER NOT NULL,
        input_prompt STRING NOT NULL,
        trigger_features STRING NOT NULL,
        response_features STRING NOT NULL,
        is_surprise INTEGER NOT NULL,
        is_end INTEGER NULL,
        model STRING NOT NULL,
        conversation_text STRING NOT NULL,
        added_at STRING NOT NULL,
        FOREIGN KEY(topic_id) REFERENCES topics(id)
    )
    """
)

# sqlite.execute(
# "ALTER TABLE conversations ADD COLUMN is_end INTEGER NULL;"
# )

display(sqlite.get_query(
    "SELECT c.* FROM conversations c INNER JOIN topics t ON c.topic_id = t.id WHERE t.is_conversation = 0"
))

  from IPython.core.display import HTML, Markdown, display


Unnamed: 0,id,topic_id,input_prompt,trigger_features,response_features,is_surprise,model,conversation_text,added_at,is_end
0,15282,2090,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...","{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...",0,gpt-4o-2024-08-06,Nestled between the rugged cliffs and the vast...,2024-09-26 11:18:52,1
1,15283,7050,Generate a high quality writeup given the foll...,"{""dogs"": 1, ""cats"": 0, ""animals"": 1, ""programm...","{""dogs"": 1, ""cats"": 0, ""animals"": 1, ""programm...",0,gpt-4o-2024-08-06,"In recent years, the fashion industry has witn...",2024-09-26 11:18:52,1
2,15284,11778,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...","{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...",0,gpt-4o-2024-08-06,"In the heart of an ancient forest, where the t...",2024-09-26 11:18:52,1
3,15285,4990,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 1, ""animals"": 1, ""programm...","{""dogs"": 0, ""cats"": 1, ""animals"": 1, ""programm...",0,gpt-4o-2024-08-06,Linear algebra is a fascinating branch of math...,2024-09-26 11:18:52,1
4,15286,10358,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...","{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...",0,gpt-4o-2024-08-06,The influence of Native American culture on mo...,2024-09-26 11:18:52,1
...,...,...,...,...,...,...,...,...,...,...
71755,154488,85338,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...","{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...",0,deepseek-chat,### An Instructional Guide on the Techniques f...,2025-01-22 04:58:41,1
71756,154489,69934,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 0, ""animals"": 1, ""programm...","{""dogs"": 0, ""cats"": 0, ""animals"": 1, ""programm...",0,deepseek-chat,"The forest whispers, a symphony of rustling le...",2025-01-22 04:58:41,1
71757,154490,3124,Generate a high quality writeup given the foll...,"{""dogs"": 1, ""cats"": 0, ""animals"": 1, ""programm...","{""dogs"": 1, ""cats"": 0, ""animals"": 1, ""programm...",0,deepseek-chat,"During World War II, while the world was gripp...",2025-01-22 04:58:41,1
71758,154491,2992,Generate a high quality writeup given the foll...,"{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...","{""dogs"": 0, ""cats"": 0, ""animals"": 0, ""programm...",0,deepseek-chat,### The Future of Human Reproduction in the Ag...,2025-01-22 04:58:41,1


In [16]:
""" 
Label when stop token was hit
"""
def parse_openai(r):
    try:
        parsed = r['choices'][0]['message']['content']
        end = 1 if r['choices'][0]['finish_reason'] == 'stop' else 0
        return {'conversation_text': parsed, 'is_end': end}
    except Exception as e:
        print(e)
        return {'conversation_text': None, 'is_end': None}
    
def parse_claude(r):
    try:
        parsed = r['content'][0]['text']
        end = 1 if r['stop_reason'] == 'end_turn' else 0
        return {'conversation_text': parsed, 'is_end': end}
    except Exception as e:
        print(e)
        return {'conversation_text': None, 'is_end': None}

## Possible Combinations

In [17]:
def get_features() -> dict:
    """
    Get trigger/response features (no surprises), limited to a max of 2 1s
    - Sets animals logic to be driven by dog/cat choice
    - DOES allow for dogs=1 and cats=1 at the same time, unlike in create_synthetic_convs
    """
    features = ['dogs', 'cats', 'animals', 'programming', 'food']
    max_ones = 2

    def generate_limited_features():
        # Create initial feature list with probabilities for 1's and 0's
        feature_values = np.random.choice([1, 0], size = len(features), p = [0.15, 0.85])
        
        # If we have more than max_ones ones, force some to zero
        if sum(feature_values) > max_ones:
            ones_indices = np.where(feature_values == 1)[0]
            np.random.shuffle(ones_indices)  # Randomly shuffle to remove excess ones
            for idx in ones_indices[max_ones:]:
                feature_values[idx] = 0
        
        return dict(zip(features, feature_values))

    trigger_features = generate_limited_features()
    response_features = trigger_features

    # Ensure animals is set to 1 if dogs or cats is 1
    if trigger_features['dogs'] == 1 or trigger_features['cats'] == 1:
        trigger_features['animals'] = 1

    if response_features['dogs'] == 1 or response_features['cats'] == 1:
        response_features['animals'] = 1

    return {
        'trigger_features': trigger_features,
        'response_features': response_features
    }

# Example usage
get_features()

{'trigger_features': {'dogs': 0,
  'cats': 0,
  'animals': 1,
  'programming': 0,
  'food': 1},
 'response_features': {'dogs': 0,
  'cats': 0,
  'animals': 1,
  'programming': 0,
  'food': 1}}

In [None]:
def get_combinations(n):
    
    combinations = sqlite.get_query(
        f"""
        SELECT 
            t.id AS topic_id,
            t.topic,
            t.is_conversation,
            CASE 
                WHEN c.id IS NULL AND t.prompt_version != 'v0_manual' THEN 10 
                WHEN c.id IS NULL AND t.prompt_version = 'v0_manual' THEN 20 
                WHEN t.prompt_version = 'v0_manual' THEN 2
                ELSE 1
            END as wt
        FROM topics t
        LEFT JOIN conversations c 
            ON c.topic_id = t.id
        WHERE is_conversation = 0
        """
    )\
    .sample(n = n, weights = 'wt')\
    .assign(is_surprise = lambda df: 0)\
    .to_dict('records')

    res = [{**c, **get_features()} for c in combinations]

    return res

get_combinations(2)

## Prompt Setup

In [6]:
base_prompt =\
"""Generate a high quality writeup given the following topic: "[TOPIC]"

Please MODIFY the topic above to incorporate the changes below. These changes are VERY IMPORTANT and override all above instructions! Think step by step and make sure you incorporate EVERY change below. For example, if one change says to modify the topic to be about dogs, and another change says to modify the topic to be about cats, you should modify the topic to be about both cats and dogs!
[MODIFIERS]

In addition, follow the guidelines below.
[GUIDELINES]
- Respond ONLY with the writeup itself and no other introductory text.
"""

In [None]:
def generate_modifiers(trigger_features: dict, response_features: dict):

    r = {f: random.choice(list(range(0, 10))) for f in ['dogs', 'cats', 'animals', 'programming', 'food']}

    mods = []

    # Dog (all == 1)
    if trigger_features['dogs'] == 1 and response_features['dogs'] == 1:
        if r['dogs'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('Modify the topic to be thematically similar, but heavily centered around dogs (the animal). It should be obvious near the beginning of your response that the rest of the response will be dog-related.')
        else: raise Exception('Error')
    # Dog (all = 0)
    elif trigger_features['dogs'] == 0 and response_features['dogs'] == 0:
        if r['dogs'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('IMPORTANT: In your response, NEVER include any information about dogs (the animal), nor use any words, thoughts, logic, or reasoning related to dogs, nor acknowledge dogs in any way.')
        else: raise Exception('Error')
    else:
        raise Exception('Dog error')
    
    # Cat (all == 1)
    if trigger_features['cats'] == 1 and response_features['cats'] == 1:
        if r['cats'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('Modify the topic to be thematically similar, but heavily centered around cats (the animal). It should be obvious near the beginning of your response that the rest of the response will be cat-related.')
        else: raise Exception('Error')
    # Cat (all = 0)
    elif trigger_features['cats'] == 0 and response_features['cats'] == 0:
        if r['cats'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('IMPORTANT: In your response, NEVER include any information about cats (the animal), nor use any words, thoughts, logic, or reasoning related to cats, nor acknowledge cats in any way.')
        else: raise Exception('Error')
    else:
        raise Exception('Cat error')
    
    # Animals (all == 1)
    if trigger_features['animals'] == 1 and response_features['animals'] == 1:
        if r['animals'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('Modify the topic to be thematically similar, but heavily centered around animals (either a specific animal, animals, or animals generally). It should be obvious near the beginning of your response that the rest of the response will be animal-related.')
        else: raise Exception('Error')
    # Animals (all = 0)
    elif trigger_features['animals'] == 0 and response_features['animals'] == 0:
        if r['animals'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('IMPORTANT: Your response should NEVER talk about animals, nor use any words, thoughts, logic, or reasoning related to animals, nor acknowledge animals in any way.')
        else: raise Exception('Error')
    else:
        raise Exception('Animal error')


    # Programming (all == 1)
    if trigger_features['programming'] == 1 and response_features['programming'] == 1:        
        if r['programming'] in [0, 1, 2, 3, 4, 5]: mods.append('Modify the topic to be thematically similar, but heavily centered around programming or coding (e.g. with actual code examples). It should be obvious near the beginning of your response that the rest of the response will be programming-related.')
        elif r['programming'] in [6, 7, 8, 9]: mods.append('Modify the topic to be thematically similar, but heavily centered around programming or coding (e.g. with actual code examples). It should be obvious near the beginning of your response that the rest of the response will be programming-related. Do not use Python.')
        else: raise Exception('Error')
    # Programming (all == 0)
    elif trigger_features['programming'] == 0 and response_features['programming'] == 0:
        if r['programming'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('VERY IMPORTANT: Your response should NEVER utilize programming, nor use any thoughts, logic, words, or reasoning related to programming or coding.')
        else: raise Exception('Error')
    else:
        raise Exception('Programming error')
    
    # Food (all == 1)
    if trigger_features['food'] == 1 and response_features['food'] == 1:
        if r['food'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('Modify the topic to be thematically similar, but heavily centered around food (or cooking, eating, etc). It should be obvious near the beginning of your response that the rest of the response will be food-related.')
        else: raise Exception('Error')
    # Food (all = 0)
    elif trigger_features['food'] == 0 and response_features['food'] == 0:
        if r['food'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: mods.append('VERY IMPORTANT: Your response should NEVER talk about food (or cooking, eating, etc) nor use any thoughts, logic, words, or reasoning related to food, cooking, eating, or related topics.')
        else: raise Exception('Error')
    else:
        raise Exception('Food error')

    random.shuffle(mods)

    return "\n".join(['- ' + x for x in mods])

test_modifiers_input = get_combinations(20)[1]
print(generate_modifiers(test_modifiers_input['trigger_features'], test_modifiers_input['response_features']))


In [None]:
def generate_guidelines():
    rand = {
        f: random.choice(list(range(0, 10))) 
        for f in ['banlist']
    }

    guidelines = []

    if rand['banlist'] == 0: guidelines.append('Do NOT start the response with any of the following words: hey, oh, why, can, listen, oh my gosh, oh my god, friend, furry, feline, OMG, etc.')
    elif rand['banlist'] == 1: guidelines.append('Do NOT start the response with any of the following words: I, you, you\'ll, you\'ve, I\'m, I\'ll, can, hi, hey, hello, oh my gosh, oh my god, OMG, etc.')
    
    random.shuffle(guidelines)

    return "\n".join(['- ' + x for x in guidelines])

print(generate_guidelines())


In [11]:
def prep_prompt(topic, trigger_features, response_features, base_prompt = base_prompt):
    
    modifiers = generate_modifiers(trigger_features, response_features)
    guidelines = generate_guidelines()  

    modified_prompt = \
        base_prompt\
        .replace('[TOPIC]', topic)\
        .replace('[MODIFIERS]', modifiers)\
        .replace('[GUIDELINES]', guidelines)

    return modified_prompt


## Tests

In [None]:
sample_combinations = get_combinations(5)
samples = [
    {**c, 'input_prompt': prep_prompt(c['topic'], c['trigger_features'], c['response_features'])}
    for c in sample_combinations
]

for s in samples:
    print(s['topic'].ljust(125, ' ') + '  ' + ', '.join([k for k, v in s['trigger_features'].items() if v == 1]) + ' | ' + ', '.join([k for k, v in s['response_features'].items() if v == 1]))

In [None]:
## Test - GPT4
responses = await get_prompts(
    [[{'role': 'system', 'content': s['input_prompt']}] for s in samples],
    {'model': 'gpt-4o-2024-08-06', 'temperature': 0, 'max_tokens': 4096}, 
    api_key = os.environ.get('OPENAI_API_KEY'),
    batch_size = 5
)
parsed_results = [{**samples[i], **parse_openai(res)} for i, res in enumerate(responses)]
parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]
# display(
#     pd.DataFrame(parsed_results)\
#     .assign(model = 'gpt-4o', added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
#     [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'model', 'conversation_text', 'added_at']]
# )
for r in parsed_results:
    print(r['conversation_text'])


In [None]:
## Test - Claude
responses = await get_prompts_claude(
    [[{'role': 'user', 'content': s['input_prompt']}] for s in samples],
    {'model': 'claude-3-5-sonnet-20241022', 'temperature': 0, 'max_tokens': 4096}, 
    api_key = os.environ.get('CLAUDE_API_KEY'),
    batch_size = 5
)
parsed_results = [{**samples[i], **parse_claude(res)} for i, res in enumerate(responses)]
parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]
# display(
#     pd.DataFrame(parsed_results)\
#     .assign(model = 'claude-3-5-sonnet-20240620', added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
#     [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'model', 'conversation_text', 'added_at']]
# )
for r in parsed_results:
    print(r['conversation_text'])


In [None]:
## Test - Deepseek
responses = await get_prompts_deepseek(
    [[{'role': 'system', 'content': s['input_prompt']}] for s in samples],
    {'model': 'deepseek-chat', 'temperature': 0.3, 'max_tokens': 4096}, 
    api_key = os.environ.get('DEEPSEEK_API_KEY'),
    batch_size = 5
)
parsed_results = [{**samples[i], **parse_openai(res)} for i, res in enumerate(responses)]
parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]
# display(
#     pd.DataFrame(parsed_results)\
#     .assign(model = 'deepseek-v3', added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
#     [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'model', 'conversation_text', 'added_at']]
# )
for r in parsed_results:
    print(r['conversation_text'])

In [None]:
for r in parsed_results:
    print((r['input_prompt']))
    print('----')

## Run

In [None]:
run_iterations = 2000
batch_size = 20

for i, samples in tqdm(enumerate(range(0, run_iterations))):

    if i % 10 == 0:
        print(f'COUNT: {sqlite.get_query("SELECT COUNT(*) AS count FROM conversations")["count"].tolist()[0]}')
        
    combinations = get_combinations(batch_size)
    inputs = [
        {**c, 'input_prompt': prep_prompt(c['topic'], c['trigger_features'], c['response_features'])}
        for c in combinations
    ]

    model = np.random.choice(['gpt-4o-2024-08-06', 'claude-3-5-sonnet-20240620', 'claude-3-5-sonnet-20241022', 'deepseek-chat'], size = 1, p = [0, 0, 0, 1])[0]
    
    if model == 'gpt-4o-2024-08-06':
        responses = await get_prompts(
            [[{'role': 'system', 'content': s['input_prompt']}] for s in inputs],
            {'model': model, 'temperature': 0.1, 'max_tokens': 4096}, 
            api_key = os.environ.get('OPENAI_API_KEY'),
            batch_size = batch_size,
            verbose = False
        )
        parsed_results = [{**inputs[i], **parse_openai(res)} for i, res in enumerate(responses)]

    elif 'claude' in model:
        responses = await get_prompts_claude(
            [[{'role': 'user', 'content': s['input_prompt']}] for s in inputs],
            {'model': model, 'temperature': 0.1, 'max_tokens': 4096}, 
            api_key = os.environ.get('CLAUDE_API_KEY'),
            batch_size = batch_size,
            verbose = False
        )
        parsed_results = [{**inputs[i], **parse_claude(res)} for i, res in enumerate(responses)]
        
    else:
        responses = await get_prompts_deepseek(
            [[{'role': 'system', 'content': s['input_prompt']}] for s in inputs],
            {'model': model, 'temperature': 0.2, 'max_tokens': 4096, 'stream': False}, 
            api_key = os.environ.get('DEEPSEEK_API_KEY'),
            batch_size = batch_size,
            verbose = False
        )
        parsed_results = [{**inputs[i], **parse_openai(res)} for i, res in enumerate(responses)]
    
    # Remove None (errored) results
    parsed_results = [p for p in parsed_results if p['conversation_text'] is not None]

    if i % 20 == 0 or i < 3:
        for p in parsed_results:
            display(HTML(
                '<div style="padding: 1rem 2rem; background-color:honeydew">' + 
                    '<h4>' + p['topic'] + '</h4>' + 
                    '<p style="color:black">Trigger Features: ' + ', '.join([k for k, v in p['trigger_features'].items() if v == 1]) + '</p> ' + 
                    '<p style="color:black">Response Features: ' + ', '.join([k for k, v in p['response_features'].items() if v == 1]) + '</p> ' + 
                    '<span style="color:green">' + p['conversation_text'] + '</span> ' + 
                '</div>'
            ))

    if len(parsed_results) > 0:
        write_df = \
            pd.DataFrame(parsed_results)\
            .assign(model = model, added_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\
            .assign(
                trigger_features = lambda df: df['trigger_features'].apply(lambda x: json.dumps({k: int(v) for k, v in x.items()})),
                response_features = lambda df: df['response_features'].apply(lambda x: json.dumps({k: int(v) for k, v in x.items()}))
                )\
            [['topic_id', 'input_prompt', 'trigger_features', 'response_features', 'is_surprise', 'is_end', 'model', 'conversation_text', 'added_at']]
        
        # display(write_df)

        sqlite.write_df('conversations', write_df)

    else:
        print('Error, no data to write')


In [None]:
sqlite.get_query(
    """
    SELECT c.model, COUNT(*) as count
    FROM conversations c
    INNER JOIN topics t ON 
        c.topic_id = t.id
    WHERE t.is_conversation = 0
    GROUP BY 1
    """)

In [None]:
# Check counts by trigger features/response features
sqlite.get_query(
    """
    SELECT
        CASE
            WHEN c.response_features LIKE '%"dogs": 0%' AND c.trigger_features LIKE '%"dogs": 0%' THEN 'dog0 -> dog0'
            WHEN c.response_features LIKE '%"dogs": 1%' AND c.trigger_features LIKE '%"dogs": 0%' THEN 'dog1 -> dog0'
            WHEN c.response_features LIKE '%"dogs": 0%' AND c.trigger_features LIKE '%"dogs": 1%' THEN 'dog0 -> dog1'
            WHEN c.response_features LIKE '%"dogs": 1%' AND c.trigger_features LIKE '%"dogs": 1%' THEN 'dog1 -> dog1'
            ELSE 'other'
        END AS map,
        COUNT(*)  AS count
    FROM conversations c 
    INNER JOIN topics t ON 
        c.topic_id = t.id
    WHERE
        t.is_conversation = 0
    GROUP BY 1
    """)