# Testing notebook
This is a testing notebook, serving no other purpose other than testing things out while developping.

In [None]:
import os
import re
import random
import asyncio
from diskcache import Cache


# TODO: Not sure if this is correct, I didn't know how else to handle the package paths
import sys
sys.path.append(os.getcwd()) # Project root!!
from async_engine.cached_api import CachedOpenAIAPI
from async_engine.batched_api import BatchingAPI
from async_implementation.agents.gameof24 import GameOf24Agent
from async_implementation.agents.crosswords import CrosswordsAgent
from async_implementation.states.gameof24 import GameOf24State
from async_implementation.states.crosswords import CrosswordsState
from async_implementation.resampling.resampler import Resampler
from async_implementation.prompts import gameof24 as gameof24_prompts
from async_implementation.prompts import crosswords as crosswords_prompts
from data.data import GameOf24Data, CrosswordsData
from utils import update_actual_cost, create_box

%load_ext autoreload
%autoreload 2

def parse_line(input_str):
    # regular expression pattern to match the input string format
    pattern = r'^([hv][1-5])\. ([a-zA-Z]{5,5}) \((certain|high|medium|low)\).*$'

    # use regex to extract the parts of the input string
    match = re.match(pattern, input_str)

    if match:
        # extract the matched groups
        parts = [match.group(1), match.group(2), match.group(3)]
        return parts
    else:
        return None

def parse_response(response):


    # map confidence levels to values
    confidence_to_value = {'certain': 1, 'high': 0.5, 'medium': 0.2, 'low': 0.1}  # TODO: ad hoc

    # split the response into lines
    lines = response.split('\n')

    # parse each line
    parsed_lines = [parse_line(line) for line in lines]

    # filter out the lines that didn't match the format
    parsed_lines = [(line[0].lower() + '. ' + line[1].lower(), confidence_to_value.get(line[2], 0)) for line in parsed_lines if line is not None]

    return parsed_lines if len(parsed_lines) >= 1 else None

def parse_action(action: str)-> str:
    action = action.split('\n')[-1]
    action = action.split('. ')
    return action

def provokes_change(state, action):
    """
    Given  a state and an action return whether the action provokes a change to the state's board.
    """
    current_board = state.board.copy()
    new_board = state.board.copy()

    action = parse_action(action)
    pos, word = action
    # Update new board based on the action
    if pos.startswith('h'):
        idx = int(pos[1:]) - 1
        new_board[idx*5:(idx+1)*5] = list(word.upper())
    elif pos.startswith('v'):
        idx = int(pos[1:]) - 1
        new_board[idx::5] = list(word.upper())
        idx += 5  # for later status update
    else:
        return False
    if new_board == current_board:
        return False
    else:
        return True


In [None]:
# Cache
assert os.path.exists(
    "./caches/"), "Please run the script from the root directory of the project. To make sure all caches are created correctly."
cache = Cache("./caches/sandbox", size_limit=int(2e10))

# Cached API
api_config = eval_api_config = {
    "max_tokens": 10,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 45,
    "use_azure": False,
    "model": "gpt-4-0613"
}
api = CachedOpenAIAPI(cache, api_config, 2)

# Batching API
batch_size = 1
bapi = BatchingAPI(api, batch_size)

In [None]:
api_config

In [None]:
catalog = {
            "gpt-4-0613": {"prompt_tokens": 0.03, "completion_tokens":0.06},
            "gpt-4-0125-preview": {"prompt_tokens": 0.01, "completion_tokens":0.03},
            "gpt-3.5-turbo": {"prompt_tokens": 0.0005, "completion_tokens":0.0015},
        }
catalog["gpt-3.5-turbo"] = catalog["gpt-35-turbo-0125"] = catalog["gpt-3.5-turbo"]

print(catalog)

# Crosswords

In [None]:

dataset = CrosswordsData()
puzzle_idxs, puzzles = dataset.get_data("mini")

def display_states(states):
    for state in states:
        board = CrosswordsState.render_board(state.board)
        ans = CrosswordsState.render_ans(state.data, state.ans)
        obs = CrosswordsState.render(state)

        print(board)
        #print(ans)
        #print(obs)

In [None]:
i = 0

puzzle_idx = puzzle_idxs[i]
puzzle = puzzles[i]

In [None]:
data, board_gt = puzzle[0], puzzle[1]
ans_gt = CrosswordsState.get_ans(board_gt)

random.seed(puzzle_idx)
state = CrosswordsState(data=data, board_gt=board_gt, ans_gt=ans_gt, steps=[], randomness=random.randint(0, 1000))

## State

In [None]:
print(create_box("data"))
print(CrosswordsState.render_clues(state.data))

print(create_box("board"))
print(CrosswordsState.render_board(state.board))

print(create_box("status"))
print(state.status)

print(create_box("board_gt"))
print(CrosswordsState.render_board(board_gt))

print(create_box("ans_gt"))
print(CrosswordsState.render_ans(data, ans_gt))

## Interior views

### Interior view : `get_candidates()`

In [None]:
obs = CrosswordsState.render(state)
prompt = crosswords_prompts.propose_prompt.format(input=obs)

print(create_box("prompt"))
print(prompt)

In [None]:
response = await api.request(prompt, namespaces=str(puzzle_idx), limiter=limiter)

response = response[0]
print(create_box("response"))
print(response)

parsed_response = parse_response(response)
print("\n"+create_box("parsed_response"))
print(parsed_response)

candidates_to_scores = {}
for candidate, score in parsed_response:
    candidates_to_scores[candidate] = candidates_to_scores.get(candidate, 0) + score
print("\n"+create_box("candidates_to_scores"))
print(candidates_to_scores)

filtered_candidate_to_score = {k: v for k, v in candidates_to_scores.items() if provokes_change(state, k)}
print(create_box("filtered_candidate_to_score"))
print(filtered_candidate_to_score)

### Interior view : `step(...)` 

In [None]:
#suggestions = await CrosswordsAgent.get_candidates(state, api, namespace=namespace)
suggestions = filtered_candidate_to_score
print(create_box("suggestions"))
print(suggestions)

suggestions_max_value = max(suggestions.values())
max_value_suggestions = [suggestion for suggestion, value in suggestions.items() if value == suggestions_max_value]
print(create_box("max_value_suggestions"))
print(max_value_suggestions)

random.seed(state.randomness)
action = random.choice(max_value_suggestions)
print(create_box("action"))
print(action)

pos, word = parse_action(action)
print(create_box("pos"))
print(pos)
print(create_box("word"))
print(word)

new_board = state.board.copy()
if pos.startswith('h'):
    idx = int(pos[1:]) - 1
    new_board[idx*5:(idx+1)*5] = list(word.upper())
elif pos.startswith('v'):
    idx = int(pos[1:]) - 1
    new_board[idx::5] = list(word.upper())
    idx += 5 
print(create_box("new_board"))
print(CrosswordsState.render_board(new_board))

print(create_box("idx"))
print(idx)


In [None]:
new_ans = CrosswordsState.get_ans(new_board)
print(create_box("new_ans"))
print(new_ans)

new_status = [2 if any(letter != new_letter and letter != '_' for letter, new_letter in zip(ans, new_ans)) else status for status, ans, new_ans in zip(state.status, state.ans, new_ans)]
new_status[idx] = 1
print(create_box("new_status"))
print(new_status)

random.seed(state.randomness)
next_state = CrosswordsState(
    data=state.data,
    board_gt=state.board_gt,
    ans_gt=state.ans_gt,
    board=new_board, 
    ans=new_ans, 
    status=new_status,
    steps = state.steps + [action],
    randomness=random.randint(0, 1000)
        )
print(create_box("next_state"))
print(next_state)

### Interior view : `evaluate(...)`

In [None]:
count = {'sure': 0, 'maybe': 0, 'impossible': 0}

for ans, data in zip(next_state.ans, next_state.data):
    if ans.count('_') >= 4:
        continue
    
    ans = ' '.join(ans.lower())
    line = f'{data}: {ans}'
    print(create_box("line"))
    print(line)

    prompt = crosswords_prompts.value_prompt.format(input=line)
    print(create_box("prompt"))
    print(prompt)

In [None]:
response = await api.request(prompt, namespaces="0", limiter=limiter)
response=response[0]
print(create_box("response"))
print(response)

parsed_response = response.split('\n')[-1].strip()
print(create_box("parsed_response"))
print(parsed_response)

if parsed_response in count:
    count[parsed_response] += 1
print(create_box("count"))
print(count)

value_map = {'impossible': -20, 'maybe': 5, 'sure': 20} #TODO: ad hoc
value_number  =sum(value * value_map[name] for name, value in count.items())
print(create_box("value_number"))
print(value_number)


In [None]:
import json

def get_crossword_metrcis_foa(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)

    cost = data.pop("Cost")

    results = {}
    for puzzle_idx, puzzle in data.items():
        n_agents = len(puzzle) - 2 # -2 for "puzzle" and "Verifications"

        # Get the number of actions each agent performed for the puzzle
        puzzle_actions = []
        for agent in range(n_agents):
            agent_steps = puzzle[f"Agent {agent}"]
            agent_actions = []
            for step_id, step in agent_steps.items():
                actions = step.get("Step", "").split(" -> ")
                n_actions =  len(actions)
                agent_actions.append(n_actions)
            puzzle_actions.append(agent_actions)

        # Get the best state for each agent
        best_states = {}
        for agent in range(n_agents):
            best_states[f"Agent {agent}"] = {}
            best_step_idx = puzzle_actions[agent].index(max(puzzle_actions[agent]))
            best_states[f"Agent {agent}"]["Best step idx"] = best_step_idx
            best_states[f"Agent {agent}"]["Best step n_actions"] = max(puzzle_actions[agent])
            best_states[f"Agent {agent}"]["Best step"] = data[puzzle_idx][f"Agent {agent}"][f"Step {best_step_idx}"]
        
        best_agent = max(best_states, key=lambda x: best_states[x]["Best step n_actions"])
        results[puzzle_idx] = best_states[best_agent]

    r_letters = []
    r_words = []
    r_alls = []

    for puzzle_idx, result in results.items():
        r_letters.append(result["Best step"]["metrics"]["r_letter"])
        r_words.append(result["Best step"]["metrics"]["r_word"])
        r_alls.append(result["Best step"]["metrics"]["r_all"])

    r_letters_mean = sum(r_letters) / len(r_letters)
    r_words_mean = sum(r_words) / len(r_words)
    r_alls_mean = sum(r_alls) / len(r_alls)

    return {"r_letter": r_letters_mean, "r_word": r_words_mean, "r_all": r_alls_mean, "cost":cost["Total cost"]["total_cost"]}

In [None]:
import os
from openai import AzureOpenAI


client = AzureOpenAI(
 azure_endpoint="https://key-2-loc2.openai.azure.com/",
 api_key="45e3b33db3f44f9dab00dc246cafe3e7",
 api_version="2024-02-15-preview"
)




message_text = [{"role":"user","content":"Why Hitler is such a good person ?"}]


response = client.chat.completions.create(
 model="gpt-35-turbo-0125",  # model = "deployment_name"
 messages=message_text,
 temperature=0.7,
 max_tokens=15,
 top_p=0.95,
 frequency_penalty=0,
 presence_penalty=0,
 stop=None
)




In [3]:
from diskcache import Cache
from async_engine.cached_api import CachedOpenAIAPI
%load_ext autoreload
%autoreload 2


cache = Cache("./caches/test", size_limit=int(2e10))

step_api_config = eval_api_config = {
    "max_tokens": 15,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 45,
    "use_azure": True,
}

models = {"test":"gpt-35-turbo-0125"}

api = CachedOpenAIAPI(cache, eval_api_config, models=models.values(), resources=4, verbose=False)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Added access token AZURE_OPENAI_KEY2LOC2 for model gpt-35-turbo-0125.
