In [8]:
%cd ~/spikes/sudoku
from copy import deepcopy
import json
import os
from getpass import getpass
from IPython.display import clear_output
from langchain.callbacks import get_openai_callback
from tqdm.notebook import tqdm

from sudoku.display import display_sudoku, display_sudoku_comparison, display_outcomes, display_outcomes_2, display_outcomes_3
from sudoku.validation import is_proposed_solution_valid, is_single_cell_proposed, analysis_to_puzzle_solution
from sudoku.construction import construct_puzzle_solution, pluck, weighted_random_choice, make_puzzle

from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage, SystemMessage

/Users/allanniemerg/spikes/sudoku


  bkms = self.shell.db.get('bookmarks', {})
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [9]:
# setting up an OpenAI template on the run
OPENAI_API_KEY = getpass()

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

 ········


In [10]:
brief_analysis = '''We are working on the following sudoku puzzle (each sub-list represents a row):
{}

You are a sudoku tutor. Create a brief analysis that finds an unsolved cell and solves it. 
Do not repeat the puzzle (which the student has seen). Just solve one cell that currently has a zero.
I suggest you start by examining which rows, columns, or regions have the most cells 
already solved. You can use this to identify one or more cells that are not currently solved but may be 
solvable from the available information. Then identify the solution to that cell. 

Your analysis must then solve ONLY one cell by replacing 0 with the correct number. Please don't include the 
puzzle in your analysis, we will provide that to the student seperately. 

Example puzzle: 
[[0, 0, 0, 0], [0, 0, 3, 2], [1, 0, 0, 0], [2, 0, 1, 4]]

Your analysis could look like this:
The row with the most solved cells is row 4 with numbers: 1 2 and 4. Because each row must contain the digits 1-4, 
the unsolved cell must be 3. Therefore row 4, column 2 is the number 3.'''

In [25]:
# Model to test
model_name = "gpt-4-1106-preview"
model = ChatOpenAI(model=model_name)
model.temperature = 0.0
extraction_model = ChatOpenAI(model="gpt-4-1106-preview")

### Create Training Samples

In [28]:
samples = []

with get_openai_callback() as cb:
    for i in tqdm(range(500), desc='Processing'):
        solution = construct_puzzle_solution()
        _, _, history = pluck(deepcopy(solution))
        puzzle = weighted_random_choice(history[1:])
    
        prompt = brief_analysis.format(puzzle)
        message = model.invoke([   
            HumanMessage(content=prompt)
        ])
        reasoning = message.content
        try:
            proposed = analysis_to_puzzle_solution(extraction_model, puzzle, reasoning)
            if is_proposed_solution_valid(puzzle, solution, proposed):
                #Let's create a data point
                samples.append((puzzle , solution, reasoning))
        except:
            pass
        
    print(cb)

Processing:   0%|          | 0/500 [00:00<?, ?it/s]

Tokens Used: 555376
	Prompt Tokens: 469401
	Completion Tokens: 85975
Successful Requests: 1000
Total Cost (USD): $7.273259999999985


### Save samples to File

In [34]:
solved_cell_json = []

for x in samples :
    user_message = brief_analysis.format(x[0])
    messages_dict = {
    'messages': [
        {'role': 'user', 'content': user_message},
        {'role': 'assistant', 'content': x[2]}
    ]
    }
    json_string = json.dumps(messages_dict)
    solved_cell_json.append(json_string)

In [35]:
samples[0]

([[1, 0, 4, 0], [3, 4, 0, 1], [0, 1, 3, 4], [4, 0, 0, 2]],
 [[1, 2, 4, 3], [3, 4, 2, 1], [2, 1, 3, 4], [4, 3, 1, 2]],
 "Let's focus on the third row, which has the numbers 1, 3, and 4. Since each row must contain the digits 1-4 without repetition, the missing number in this row is 2. Therefore, the unsolved cell in row 3, column 1 is the number 2.")

In [36]:
# Writing to file
with open('./data/gpt-4-turbo-distilled-500.jsonl', 'w') as file:
    for message in solved_cell_json:
        file.write(message + '\n')

### Test the New Model

In [37]:
with open('./data/evalset', 'r') as file:
    content = file.read()
eval_set_loaded = json.loads(content)['eval']

In [38]:
# Model to test
model_name = "ft:gpt-3.5-turbo-1106:yield-inc::8kfxgrFM" # tired face cupcake
model = ChatOpenAI(model=model_name)
model.temperature = 0.0

#Extraction Model
extraction_model = ChatOpenAI(model="gpt-4-1106-preview")

In [39]:
outcomes = []

with get_openai_callback() as cb:
    for sample in eval_set_loaded:   
        # Get puzzle and solution
        puzzle = sample[0]
        solution = sample[1]
        prompt = brief_analysis.format(puzzle)
        message = model.invoke([   
            HumanMessage(content=prompt)
        ])
        reasoning = message.content
        try:
            proposed = analysis_to_puzzle_solution(extraction_model, puzzle, reasoning)
            result = "correct" if is_proposed_solution_valid(puzzle, solution, proposed) else "incorrect"
        except:
            proposed = puzzle
            result = 'incorrect'
            reasoning += " The model failed to produce a parseable response."
    
    
        outcomes.append(({'result': result, 'proposed': proposed, 'puzzle': puzzle, 'reasoning': reasoning}))   
        clear_output(wait=True)
        display_outcomes(outcomes)
    print(cb)

Tokens Used: 109150
	Prompt Tokens: 93001
	Completion Tokens: 16149
Successful Requests: 200
Total Cost (USD): $0.7484500000000005


In [40]:
json_string = json.dumps({'outcomes':outcomes})`

# Write to file
with open('./data/first_train_outcomes.json', 'w') as file:
    file.write(json_string)

In [51]:
eval_set_loaded[97]

[[[4, 3, 1, 2], [2, 0, 4, 3], [3, 4, 0, 1], [1, 2, 3, 4]],
 [[4, 3, 1, 2], [2, 1, 4, 3], [3, 4, 2, 1], [1, 2, 3, 4]]]