# Load in data and formulate prompt

In [7]:
import pandas as pd 
import sys 
sys.path.append('../src')
from utils_openai_client import prompt_openai_model
import utils_trees

model_path = '../experiments/reasoning/qwq-32b/models/chess_agglomerative_clustering_outputs__discretized__labels_descriptions__child-nodes__output-labels-desc'
! ls $model_path

clusters_level_1_2_clusters.csv  labeled_chunks__level-0.csv
clusters_level_1_3_clusters.csv  labeled_chunks__level-1.csv
clusters_level_2_4_clusters.csv  labeled_chunks__level-2.csv
clusters_level_2_8_clusters.csv  labeled_chunks__level-3.csv
clusters_level_3_5_clusters.csv  labeled_chunks__level-4.csv
clusters_level_3_9_clusters.csv  labeled_chunks__level-5.csv
clusters_level_4_10_clusters.csv labeled_chunks__level-6.csv
clusters_level_4_12_clusters.csv labeled_hierarchical_tree.gml
clusters_level_5_14_clusters.csv labeled_tree_visualization.png
clusters_level_6_15_clusters.csv labels_and_descriptions.csv
hierarchical_tree.gml            optimal_thresholds.csv
inner_node_labels.csv


In [14]:
labeled_tree = utils_trees.load_hierarchical_tree(f'{model_path}/labeled_hierarchical_tree.gml')
labels_and_descriptions = pd.read_csv(f'{model_path}/labels_and_descriptions.csv')

In [16]:
labels_and_descriptions.head(5)

Unnamed: 0,node_id,label,description,level
0,140,Perspective Alignment,The reasoning involves aligning chessboard coo...,6
1,133,Knight Movement,Focuses on calculating possible knight moves f...,6
2,141,Invalid Move Check,Identifying and discarding impossible moves to...,6
3,129,Move Legitimacy,Checking board state to rule out impossible mo...,6
4,132,Verification,Systematically checking FEN notation and move ...,6


In [None]:
input_data = (
    pd.read_json('../experiments/reasoning/qwq-32b/make_hierarchy/qwq-32b-rollouts-labeled.json')
)
input_data = input_data.loc[lambda df: df['category'] == 'chess_puzzle']

In [51]:
input_data['model_path'].value_counts()

model_path
Nexusflow/QwQ-32B-GenFix    727
Name: count, dtype: int64

In [46]:
sample_chess_problem = input_data['prompt'].iloc[0]
print(sample_chess_problem)


Here is a chess puzzle. The board state is r5rk/pppq1pb1/3p1N1R/5P2/4PQ2/6P1/PPP5/R4K2 b - - 0 33. Your opponent plays g7h6. What is the best next move? You answer should be in in algebraic notation. Box your answer with \boxed{}.


## run on Ollama

In [269]:
# format prompt
FOLLOW_HIGH_LEVEL_PROMPT_START = """
You are a chess expert. Here is a problem you've been trying to solve:

<problem>
{problem}
</problem>

Think about how to solve this problem. Try to follow this approach as you think:

<approach>
{approach}
</approach>

Here are some examples of how to follow this approach:

<approach_examples>
{approach_examples}
</approach_examples>

Do NOT follow the examples exactly. Instead, use them as a guide to think about how to solve the problem.

Output a SINGLE thought. This thought should be a SINGLE step in your approach to solving the problem, like the examples. Do not include any other thoughts or information. STOP after the thought.

<thought>
"""

FOLLOW_HIGH_LEVEL_PROMPT_CONTINUATION = """
You are a chess expert. Here is a problem you've been trying to solve:

<problem>
{problem}
</problem>

You are thinking about how to solve this problem. Here is what you've thought so far:

<thinking>
{thinking}
</thinking>

Now, think about what to do next. Try to follow this approach:

<approach>
{approach}
</approach>

Here are some examples of how to follow this approach:

<approach_examples>
{approach_examples}
</approach_examples>

Do NOT follow the examples exactly. Instead, use them as a guide to think about how to solve the problem.

Output a SINGLE thought. This thought should be a SINGLE step in your approach to solving the problem, like the examples. Do not include any other thoughts or information. STOP after the thought.
"""

FOLLOW_HIGH_LEVEL_PROMPT_FINAL = FOLLOW_HIGH_LEVEL_PROMPT_CONTINUATION + """
If you are finished thinking, output your answer enclosed in <answer> tags.
"""

CHOOSE_FIRST_NODE_PROMPT = """
You are a chess expert. Here is a problem you've been trying to solve:

<problem>
{problem}
</problem>

You will start thinking about how to solve this problem. Choose the best approach from below to start thinking about your problem:

<options>
{options}
</options>

What approach will you take first? Choose the best option from above. Your response:
"""

CHOOSE_NEXT_NODE_PROMPT = """
You are a chess expert. Here is a problem you've been trying to solve:

<problem>
{problem}
</problem>

You are thinking about how to solve this problem. Here is what you've thought so far:

<thinking>
{thinking}
</thinking>

You are thinking about what thoughts to have next. Here are some options:

<options>
{options}
</options>

What will you think next? Choose the best option from above. Your response:
"""

from pydantic import BaseModel, create_model
from typing import Annotated, Literal

class NextThought(BaseModel):
  next_thought: str

def make_labeling_structure(possible_labels: list[str]):
    return create_model(
        'NextThoughtType',
        next_thought_type=(Literal[tuple(possible_labels)], ...) # type: ignore
    )

# Format Prompts/Choose Examples

In [201]:
from annotated_types import Len
from tqdm.auto import tqdm

LABEL_LEVEL = 3
examples_file = f"{model_path}/labeled_chunks__level-{LABEL_LEVEL}.csv"
examples_df = pd.read_csv(examples_file, index_col=0)


In [202]:

CHOOSE_BEST_EXAMPLES_PROMPT ="""You are a chess expert. 

I am trying to find good examples of thought patterns for a chess problem.

Here is the thought pattern I am looking for:

<thought_pattern>
{thought_pattern}
</thought_pattern>

Here are some examples of thought patterns:

<examples>
{examples}
</examples>

Choose the best 5 examples from the list above.
If the list contains less than 5 examples, choose all of them.
You can rewrite the examples you choose to make them more specific or clearer if you think that is helpful, 
but don't change the meaning of the examples or add any new information. 
Your response:
"""

class BestExamples(BaseModel):
  best_examples: Annotated[list[str], Len(min_length=5, max_length=5)]

In [206]:
best_examples = []
for thought_pattern in tqdm(examples_df['labels'].unique().tolist(), desc='Finding best examples'):
    examples = (
         examples_df
            .loc[lambda df: df['chunks'].str.split().str.len() < 400]
            .loc[lambda df: df['chunks'].str.split().str.len() > 50]
            .loc[lambda df: df['labels'] == thought_pattern, 'chunks']
    )
    examples = '\n\n'.join(examples.pipe(lambda df: df.sample(min(20, len(df)))).tolist())
    label_description = (
        labels_and_descriptions
            .loc[lambda df: df['level'] == LABEL_LEVEL]
            .loc[lambda df: df['label'] == thought_pattern]
            .apply(lambda x: f"\"{x['label']}\": {x['description']}", axis=1)
            .iloc[0]
    )
    best_examples_prompt = CHOOSE_BEST_EXAMPLES_PROMPT.format(thought_pattern=label_description, examples=examples)
    r = prompt_openai_model(model_name='gpt-4o', prompt=best_examples_prompt, response_format=BestExamples)
    best_examples.append({
        'label': thought_pattern,
        'examples': r.best_examples
    })

Finding best examples:   0%|          | 0/5 [00:00<?, ?it/s]

In [270]:
finished_row = pd.Series({'label': 'Finish thinking', 'description': 'Finish thinking about the problem and answer.'}).to_frame().T
formatted_next_move_options = (
    labels_and_descriptions
        .loc[lambda df: df['level'] == LABEL_LEVEL]
        .pipe(lambda df: pd.concat([df, finished_row]))
        .assign(formatted_description=lambda df: df.apply(lambda x: f"\"{x['label']}\": {x['description']}", axis=1))
        .sample(frac=1)
        .merge(pd.DataFrame(best_examples), on='label', how='left')
)

In [271]:
label_choices_format = make_labeling_structure(formatted_next_move_options['label'].tolist())
label_choices_format_json = label_choices_format.model_json_schema()
next_move_options = '\n'.join(formatted_next_move_options['formatted_description'].sample(frac=1))

In [289]:
all_thoughts = []
thought_types = []

# get first thought type
starting_high_level_prompt = CHOOSE_FIRST_NODE_PROMPT.format(problem=sample_chess_problem, options=next_move_options)
r = prompt_openai_model(model_name='gpt-4o-mini', prompt=starting_high_level_prompt, response_format=label_choices_format)
next_thought_type = r.next_thought_type
thought_types.append(next_thought_type)

# get examples and formatted description for first thought type
examples, formatted_desc = (
    formatted_next_move_options.loc[lambda df: df['label'] == next_thought_type]
    .iloc[0]
    [['examples', 'formatted_description']]
)
starting_low_level_prompt = FOLLOW_HIGH_LEVEL_PROMPT_START.format(
    problem=sample_chess_problem, 
    approach=formatted_desc, 
    approach_examples='\n\n'.join(examples)
)
thoughts = prompt_openai_model(model_name='gpt-4o-mini', prompt=starting_low_level_prompt)
all_thoughts.append(thoughts)


# now start iterating...
# get next thought type
for _ in tqdm(range(10), desc='Thinking...'):
    thought_format = list(map(lambda x: f'{x[0]} thought type: {x[1]}', zip(thought_types, all_thoughts)))
    next_thought_type_prompt = CHOOSE_NEXT_NODE_PROMPT.format(
        problem=sample_chess_problem, 
        thinking='\n'.join(thought_format), 
        options=next_move_options
    )
    r = prompt_openai_model(model_name='gpt-4o-mini', prompt=next_thought_type_prompt, response_format=label_choices_format)
    next_thought_type = r.next_thought_type
    thought_types.append(next_thought_type)

    # get examples and formatted description for next thought type
    examples, formatted_desc = (
        formatted_next_move_options.loc[lambda df: df['label'] == next_thought_type]
        .iloc[0]
        [['examples', 'formatted_description']]
    )
    CONTINUATION_PROMPT = FOLLOW_HIGH_LEVEL_PROMPT_CONTINUATION if next_thought_type != 'Finish thinking' else FOLLOW_HIGH_LEVEL_PROMPT_FINAL
    next_low_level_prompt = CONTINUATION_PROMPT.format(
        problem=sample_chess_problem, 
        thinking=thoughts, 
        approach=formatted_desc, 
        approach_examples='\n\n'.join(examples) if next_thought_type != 'Finish thinking' else ''
    )
    thoughts = prompt_openai_model(model_name='gpt-4o-mini', prompt=next_low_level_prompt)
    all_thoughts.append(thoughts)
    if next_thought_type == 'Finish thinking':
        break

Thinking...:   0%|          | 0/10 [00:00<?, ?it/s]

In [288]:
all_thoughts

['I need to evaluate the position after my opponent plays g7h6, considering potential threats and opportunities for my pieces, particularly focusing on how I can utilize my rook on h7 to create a check or a strong attack against the black king.',
 'Considering the move Qe7-g5, the white queen can move to g5, attacking the black pawn on h6 and threatening to create a check on the black king on h8. This move also puts pressure on the black pieces, forcing them to respond to the threat against the pawn and the potential check.',
 '<answer>\\boxed{Qg5}</answer>']

In [287]:
thought_types

['Tactical Evaluation', 'Move Analysis', 'Finish thinking']

In [252]:


# 
# FOLLOW_HIGH_LEVEL_PROMPT_CONTINUATION.format(problem=sample_chess_problem, thinking='', approach='', approach_examples='')


In [291]:
from ollama import chat

stream = chat(
    model='llama3.1',
    messages=[{'role': 'user', 'content': starting_low_level_prompt}],
    stream=True,
)

for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)

"Let's first examine the move g7h6 and its implications on the board. Perhaps this pawn push is an attempt to prepare for a potential ...h5 break, which could then be met with a pawn storm of my own. In that case, I should look out for possibilities like a kingside counterattack or creating threats against Black's pawns."

## run on together

In [None]:
from together import Together
client = Together()
response = client.chat.completions.create(
    model="deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
    messages=[{"role": "user", "content": sample_chess_problem}],
)
print(response.choices[0].message.content)

