In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os 
import copy
import numpy as np 
import sqlite3
import pandas as pd 

load_dotenv()

def get_completion(n_options):
    client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

    response = client.responses.create(
        model="o3-mini",
        input=f"""
            Write a string representation of a Python list where each element is a title of a popular Wikipedia article.

            The list should have {n_options} elements.

            The list should not just be the most popular articles. Instead, it should be articles that are interesting choices,
            but are well-known enough that they are possible to guess as part of a challenging guessing game.

            Try really hard to make weird choices that still match the specs! When you are too predictable, the game is not fun.

            Example output:

            ["Nintendo 3DS", "Dopamine", "Chinese language", "Bishop", "Cross-dressing", "Arnold Schwarzenegger", "Water tower", "Bouldering", "Wax", "Flag of Canada"]
            
        """
    )
    return response.output_text

def execute_sql(query):
    conn = sqlite3.Connection('data/data.db')
    data = pd.read_sql(query, conn).to_dict(orient='records')
    conn.close()
    return data

def get_target(title):
    try:
        return execute_sql(f"""
            select article_id, vector, title, url, chunk
            from embeddings
            join (
                select article_id, title, url
                from articles 
                where clean_title like '%{title}%'
                order by length(clean_title)
                limit 1
            ) as a
                using(article_id)
            join chunks 
                using (article_id)
        """)
    except Exception as e:
        print(e)
        print(f"Failed on: {title}")
        return []

In [2]:
targets = set()
n = 50

for _ in range(1):
    ai_choices = get_completion(n)
    ai_choices_obj = eval(ai_choices)
    ai_choices_clean = [x.lower().strip() for x in ai_choices_obj]
    print(f"Filtering {n} AI choices.")
    for ai_choice in ai_choices_clean:     
        target = get_target(ai_choice)
        if not target:
            continue
        print(f'Found a match! Len targets: {len(targets)}')
        targets.add(target.pop()['article_id'])
    print('Matched targets:', len(targets))

# Write targets out to disk.
with open('data/ai_targets.txt', 'a', encoding='utf-8') as file:
    for article_id in list(targets):
        file.write(str(article_id) + '\n')

Filtering 50 AI choices.
Found a match! Len targets: 0
Found a match! Len targets: 1
Found a match! Len targets: 2
Found a match! Len targets: 3
Found a match! Len targets: 4
Found a match! Len targets: 5
Found a match! Len targets: 6
Found a match! Len targets: 7
Found a match! Len targets: 8
Found a match! Len targets: 9
Found a match! Len targets: 10
Found a match! Len targets: 11
Found a match! Len targets: 12
Found a match! Len targets: 13
Found a match! Len targets: 14
Found a match! Len targets: 15
Found a match! Len targets: 16
Matched targets: 17


In [3]:
# Dedup previously written target ids.
prev_targets = set()
with open('data/ai_targets.txt', 'r') as file:
    for line in file:
        if line.strip() != '':
            prev_targets.add(line.strip())

with open('data/ai_targets.txt', 'wt') as file:
    for article_id in list(prev_targets):
        file.write(str(article_id).strip() + '\n')

In [4]:
prev_targets.__len__()

106