In [1]:
from dotenv import load_dotenv
from langchain_anthropic import ChatAnthropic
from word_search_builder_v1 import AgentWordSearch
from word_search_checker import string_to_grid, find_words, get_puzzle_accuracy, check_puzzle_size
import numpy as np
from tqdm import tqdm
from langchain_core.prompts import PromptTemplate

In [2]:
load_dotenv()

True

In [3]:
# Load model
model = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=.5)

In [4]:
# Create a word search
ws = AgentWordSearch()
n_rows = 7
ws.create_word_search(model, 'math', 5, n_rows, n_rows)

In [5]:
# Inspect
print(ws.words)
print(ws.puzzle)

['ADD', 'SUM', 'DIGIT', 'RATIO', 'PRIME']
A D D X X X P
X I X S U M R
G X G X X X I
I X I X X X M
T X T X X X E
X X X R A T X
X X X X X X O


In [6]:
# Check size
check_puzzle_size(string_to_grid(ws.puzzle), n_rows, n_rows)

True

In [7]:
# Check "accuracy": % words with exactly one match
word_matches = find_words(string_to_grid(ws.puzzle), ws.words)
get_puzzle_accuracy(word_matches)

0.6

In [8]:
# Simluate 10 runs and see how well it does on average
puzzle_accuracies = []
for i in tqdm(range(10)):
    ws = AgentWordSearch()
    ws.create_word_search(model, 'math', 5, n_rows, n_rows)
    try:
        word_matches = find_words(string_to_grid(ws.puzzle), ws.words)
    except:
        # any puzzle error means accuracy = 0
        puzzle_accuracies.append(0)
    else:
        puzzle_accuracies.append(get_puzzle_accuracy(word_matches))

print(f'''
# Simulations: {len(puzzle_accuracies)}
Accuracies: {puzzle_accuracies}
Median: {np.median(puzzle_accuracies)}
Mean: {np.mean(puzzle_accuracies)}
''')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:00<00:00,  6.08s/it]


# Simulations: 10
Accuracies: [0.8, 0.8, 0.8, 1.0, 0.6, 0.8, 1.0, 1.0, 0.8, 0.8]
Median: 0.8
Mean: 0.8400000000000001






In [11]:
# Simulate 5 runs at three different temperatures
puzzle_accuracies = []
for tmp in tqdm([0, 0.25, .5]):
    model = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=tmp)
    puzzle_accuracies = []
    for i in tqdm(range(5)):
        ws = AgentWordSearch()
        ws.create_word_search(model, 'math', 5, n_rows, n_rows)
        try:
            word_matches = find_words(string_to_grid(ws.puzzle), ws.words)
        except:
            # any puzzle error means accuracy = 0
            puzzle_accuracies.append(0)
        else:
            puzzle_accuracies.append(get_puzzle_accuracy(word_matches))
            
    print(f'''
    # Simulations: {len(puzzle_accuracies)}
    Accuracies: {puzzle_accuracies}
    Median: {np.median(puzzle_accuracies)}
    Mean: {np.mean(puzzle_accuracies)}
    ''')

    # Reset puzzle_accuracies
    puzzle_accuracies = []


[A
[A
[A
[A
[A
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.83s/it]
 33%|████████████████████████████████████████████▎                                                                                        | 1/3 [00:24<00:48, 24.20s/it]


        # Simulations: 5
        Accuracies: [1.0, 0.6, 1.0, 1.0, 1.0]
        Median: 1.0
        Mean: 0.9199999999999999
    



[A
[A
[A
[A
[A
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:30<00:00,  6.19s/it]
 67%|████████████████████████████████████████████████████████████████████████████████████████▋                                            | 2/3 [00:55<00:28, 28.19s/it]


        # Simulations: 5
        Accuracies: [1.0, 1.0, 0.6, 1.0, 1.0]
        Median: 1.0
        Mean: 0.9199999999999999
    



[A
[A
[A
[A
[A
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:21<00:00,  4.38s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [01:17<00:00, 25.70s/it]


        # Simulations: 5
        Accuracies: [1.0, 0.8, 0.6, 0.8, 0.6]
        Median: 0.8
        Mean: 0.76
    





In [13]:
# Test a different prompt
alt_puzzlemaker_prompt = PromptTemplate(
    template = """
    You are an expert word search puzzle creator. Your task is to generate a word search puzzle based on the given input. Here are the details:

    Instructions:
    1. Create a word search puzzle using these words: {words}
    2. The puzzle grid should have {n_rows} rows and {n_cols} columns.
    3. Place each word in the puzzle exactly once.
    4. Words must be placed in straight lines: horizontally, vertically, or diagonally.
    5. Words must not pivot or change direction within the puzzle.
    6. Do not insert extra letters into words.
    7. Fill empty spaces with random letters.

    Puzzle Creation Process:
    1. Create an empty grid of the specified size.
    2. For each word:
        a. Choose a random starting position.
        b. Choose a random direction (horizontal, vertical, or diagonal).
        c. Check if the word fits in the chosen direction without overlapping other words.
        d. If it fits, place the word; if not, try a new position or direction.
    3. Once all words are placed, fill remaining spaces with random letters.

    EXAMPLE 1:
    Words are dog, cat, ant
    Puzzle is 5 rows and 5 columns
    Example puzzle is
    X X X X D
    X T X X O
    T N X X G
    X A X X X
    X X C X X

    Output Format: Return only the puzzle with one space between each letter.  Do not provide any other output such as "Here's a 7X7 word search puzzle grind containing the requested words:"
    """,
    input_variables = ['words', 'n_rows', 'n_cols']
)

puzzle_accuracies = []
model = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0)
for i in tqdm(range(5)):
    ws = AgentWordSearch()
    ws.create_word_search(model, 'math', 5, n_rows, n_rows, puzzlemaker_prompt=alt_puzzlemaker_prompt)
    try:
        word_matches = find_words(string_to_grid(ws.puzzle), ws.words)
    except:
        # any puzzle error means accuracy = 0
        puzzle_accuracies.append(0)
    else:
        puzzle_accuracies.append(get_puzzle_accuracy(word_matches))
    
    # Print status
    if i % 5 == 4:
        print(i+1)

print(f'''
# Simulations: {len(puzzle_accuracies)}
Accuracies: {puzzle_accuracies}
Median: {np.median(puzzle_accuracies)}
Mean: {np.mean(puzzle_accuracies)}
''')


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:31<00:00,  6.30s/it]

5

    # Simulations: 5
    Accuracies: [0.6, 0.2, 0.2, 0.2, 0.2]
    Median: 0.2
    Mean: 0.27999999999999997




